[PATCH 3/4] powerpc ppc-opcode: add opcodes for vsx vector paired instructions

2020-06-29 Thread Balamuruhan S
add instruction opcodes for new vsx vector paired instructions,
* Load VSX Vector Paired (lxvp)
* Load VSX Vector Paired Indexed (lxvpx)
* Store VSX Vector Paired (stxvp)
* Store VSX Vector Paired Indexed (stxvpx)

Signed-off-by: Balamuruhan S 
---
 arch/powerpc/include/asm/ppc-opcode.h | 4 
 1 file changed, 4 insertions(+)

diff --git a/arch/powerpc/include/asm/ppc-opcode.h 
b/arch/powerpc/include/asm/ppc-opcode.h
index 2a39c716c343..558efd25683b 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -210,6 +210,10 @@
 #define PPC_INST_ISEL  0x7c1e
 #define PPC_INST_ISEL_MASK 0xfc3e
 #define PPC_INST_LDARX 0x7ca8
+#define PPC_INST_LXVP  0x1800
+#define PPC_INST_LXVPX 0x7c00029a
+#define PPC_INST_STXVP 0x1801
+#define PPC_INST_STXVPX0x7c00039a
 #define PPC_INST_STDCX 0x7c0001ad
 #define PPC_INST_LQARX 0x7c000228
 #define PPC_INST_STQCX 0x7c00016d
-- 
2.24.1



[PATCH 2/4] powerpc/sstep: support emulation for vsx vector paired storage access instructions

2020-06-29 Thread Balamuruhan S
add emulate_step() changes to support vsx vector paired storage
access instructions that provides octword operands loads/stores
between storage and set of 64 Vector Scalar Registers (VSRs).

Signed-off-by: Balamuruhan S 
---
 arch/powerpc/include/asm/sstep.h |  2 +-
 arch/powerpc/lib/sstep.c | 58 +++-
 2 files changed, 50 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/include/asm/sstep.h b/arch/powerpc/include/asm/sstep.h
index 3b01c69a44aa..a6c0b299bcc9 100644
--- a/arch/powerpc/include/asm/sstep.h
+++ b/arch/powerpc/include/asm/sstep.h
@@ -126,7 +126,7 @@ union vsx_reg {
unsigned long d[2];
float   fp[4];
double  dp[2];
-   __vector128 v;
+   __vector128 v[2];
 };
 
 /*
diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index c92890e71ca7..74c730cae7d8 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -279,6 +279,19 @@ static nokprobe_inline void do_byte_reverse(void *ptr, int 
nb)
up[1] = tmp;
break;
}
+   case 32: {
+   unsigned long *up = (unsigned long *)ptr;
+   unsigned long tmp;
+
+   tmp = byterev_8(up[0]);
+   up[0] = byterev_8(up[3]);
+   up[3] = tmp;
+   tmp = byterev_8(up[2]);
+   up[2] = byterev_8(up[1]);
+   up[1] = tmp;
+   break;
+   }
+
 #endif
default:
WARN_ON_ONCE(1);
@@ -709,6 +722,8 @@ void emulate_vsx_load(struct instruction_op *op, union 
vsx_reg *reg,
reg->d[0] = reg->d[1] = 0;
 
switch (op->element_size) {
+   case 32:
+   /* [p]lxvp[x] or [p]stxvp[x] */
case 16:
/* whole vector; lxv[x] or lxvl[l] */
if (size == 0)
@@ -717,7 +732,7 @@ void emulate_vsx_load(struct instruction_op *op, union 
vsx_reg *reg,
if (IS_LE && (op->vsx_flags & VSX_LDLEFT))
rev = !rev;
if (rev)
-   do_byte_reverse(reg, 16);
+   do_byte_reverse(reg, size);
break;
case 8:
/* scalar loads, lxvd2x, lxvdsx */
@@ -793,6 +808,22 @@ void emulate_vsx_store(struct instruction_op *op, const 
union vsx_reg *reg,
size = GETSIZE(op->type);
 
switch (op->element_size) {
+   case 32:
+   /* [p]lxvp[x] or [p]stxvp[x] */
+   if (size == 0)
+   break;
+   if (IS_LE && (op->vsx_flags & VSX_LDLEFT))
+   rev = !rev;
+   if (rev) {
+   /* reverse 32 bytes */
+   buf.d[0] = byterev_8(reg->d[3]);
+   buf.d[1] = byterev_8(reg->d[2]);
+   buf.d[2] = byterev_8(reg->d[1]);
+   buf.d[3] = byterev_8(reg->d[0]);
+   reg = 
+   }
+   memcpy(mem, reg, size);
+   break;
case 16:
/* stxv, stxvx, stxvl, stxvll */
if (size == 0)
@@ -861,28 +892,33 @@ static nokprobe_inline int do_vsx_load(struct 
instruction_op *op,
   bool cross_endian)
 {
int reg = op->reg;
-   u8 mem[16];
+   int i, nr_vsx_regs;
+   u8 mem[32];
union vsx_reg buf;
int size = GETSIZE(op->type);
 
if (!address_ok(regs, ea, size) || copy_mem_in(mem, ea, size, regs))
return -EFAULT;
 
+   nr_vsx_regs = size / sizeof(__vector128);
emulate_vsx_load(op, , mem, cross_endian);
preempt_disable();
if (reg < 32) {
/* FP regs + extensions */
if (regs->msr & MSR_FP) {
-   load_vsrn(reg, );
+   for (i = 0; i < nr_vsx_regs; i++)
+   load_vsrn(reg + i, [i]);
} else {
current->thread.fp_state.fpr[reg][0] = buf.d[0];
current->thread.fp_state.fpr[reg][1] = buf.d[1];
}
} else {
if (regs->msr & MSR_VEC)
-   load_vsrn(reg, );
+   for (i = 0; i < nr_vsx_regs; i++)
+   load_vsrn(reg + i, [i]);
+
else
-   current->thread.vr_state.vr[reg - 32] = buf.v;
+   current->thread.vr_state.vr[reg - 32] = buf.v[0];
}
preempt_enable();
return 0;
@@ -893,27 +929,31 @@ static nokprobe_inline int do_vsx_store(struct 
instruction_op *op,
bool cross_endian)
 {
int reg = op->reg;
-   u8 mem[16];
+   int i, nr_vsx_regs;
+   u8 mem[32];
union vsx_reg buf;
int size = GETSIZE(op->type);
 
if (!address_ok(regs, ea, size))
return -EFAULT;
 
+   nr_vsx_regs = size / 

[PATCH 0/4] VSX 32-byte vector paired load/store instructions

2020-06-29 Thread Balamuruhan S
VSX vector paired instructions operates with octword (32-byte) operand
for loads and stores between storage and a pair of two sequential Vector-Scalar
Registers (VSRs). There are 4 word instructions and 2 prefixed instructions
that provides this 32-byte storage access operations - lxvp, lxvpx, stxvp,
stxvpx, plxvpx, pstxvpx.

Emulation infrastructure doesn't have support for these instructions, to
operate with 32-byte storage access and to operate with 2 VSX registers.
This patch series enables the instruction emulation support and adds test
cases for them respectively.

Balamuruhan S (4):
  powerpc/sstep: support new VSX vector paired storage access
instructions
  powerpc/sstep: support emulation for vsx vector paired storage access
instructions
  powerpc ppc-opcode: add opcodes for vsx vector paired instructions
  powerpc sstep: add testcases for vsx load/store instructions

 arch/powerpc/include/asm/ppc-opcode.h |  11 ++
 arch/powerpc/include/asm/sstep.h  |   2 +-
 arch/powerpc/lib/sstep.c  | 102 +-
 arch/powerpc/lib/test_emulate_step.c  | 273 ++
 4 files changed, 378 insertions(+), 10 deletions(-)


base-commit: 6469e8962c20b580b471790fe42367750599
-- 
2.24.1



[PATCH 1/4] powerpc/sstep: support new VSX vector paired storage access instructions

2020-06-29 Thread Balamuruhan S
VSX Vector Paired instructions loads/stores an octword (32 bytes)
from/to storage into two sequential VSRs. Add `analyse_instr()` support
to these new instructions,
* Load VSX Vector Paired (lxvp)
* Load VSX Vector Paired Indexed (lxvpx)
* Prefixed Load VSX Vector Paired (plxvp)
* Store VSX Vector Paired (stxvp)
* Store VSX Vector Paired Indexed (stxvpx)
* Prefixed Store VSX Vector Paired (pstxvp)

Signed-off-by: Balamuruhan S 
---
 arch/powerpc/lib/sstep.c | 44 
 1 file changed, 44 insertions(+)

diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index 5abe98216dc2..c92890e71ca7 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -31,6 +31,10 @@ extern char system_call_common[];
 #define XER_OV32   0x0008U
 #define XER_CA32   0x0004U
 
+#ifdef CONFIG_VSX
+#define VSX_REGISTER_XTP(rd)   rd) & 1) << 5) | ((rd) & 0xfe))
+#endif
+
 #ifdef CONFIG_PPC_FPU
 /*
  * Functions in ldstfp.S
@@ -2382,6 +2386,13 @@ int analyse_instr(struct instruction_op *op, const 
struct pt_regs *regs,
op->vsx_flags = VSX_SPLAT;
break;
 
+   case 333:   /* lxvpx */
+   op->reg = VSX_REGISTER_XTP(rd);
+   op->type = MKOP(LOAD_VSX, 0, 32);
+   op->element_size = 32;
+   op->vsx_flags = VSX_CHECK_VEC;
+   break;
+
case 364:   /* lxvwsx */
op->reg = rd | ((word & 1) << 5);
op->type = MKOP(LOAD_VSX, 0, 4);
@@ -2410,6 +2421,12 @@ int analyse_instr(struct instruction_op *op, const 
struct pt_regs *regs,
VSX_CHECK_VEC;
break;
}
+   case 461:   /* stxvpx */
+   op->reg = VSX_REGISTER_XTP(rd);
+   op->type = MKOP(STORE_VSX, 0, 32);
+   op->element_size = 32;
+   op->vsx_flags = VSX_CHECK_VEC;
+   break;
case 524:   /* lxsspx */
op->reg = rd | ((word & 1) << 5);
op->type = MKOP(LOAD_VSX, 0, 4);
@@ -2651,6 +2668,21 @@ int analyse_instr(struct instruction_op *op, const 
struct pt_regs *regs,
 #endif
 
 #ifdef CONFIG_VSX
+   case 6:
+   op->ea = dqform_ea(word, regs);
+   op->reg = VSX_REGISTER_XTP(rd);
+   op->element_size = 32;
+   op->vsx_flags = VSX_CHECK_VEC;
+   switch (word & 0xf) {
+   case 0: /* lxvp */
+   op->type = MKOP(LOAD_VSX, 0, 32);
+   break;
+   case 1: /* stxvp */
+   op->type = MKOP(STORE_VSX, 0, 32);
+   break;
+   }
+   break;
+
case 61:/* stfdp, lxv, stxsd, stxssp, stxv */
switch (word & 7) {
case 0: /* stfdp with LSB of DS field = 0 */
@@ -2779,12 +2811,24 @@ int analyse_instr(struct instruction_op *op, const 
struct pt_regs *regs,
case 57:/* pld */
op->type = MKOP(LOAD, PREFIXED, 8);
break;
+   case 58:/* plxvp */
+   op->reg = VSX_REGISTER_XTP(rd);
+   op->type = MKOP(LOAD_VSX, PREFIXED, 32);
+   op->element_size = 32;
+   op->vsx_flags = VSX_CHECK_VEC;
+   break;
case 60:/* stq */
op->type = MKOP(STORE, PREFIXED, 16);
break;
case 61:/* pstd */
op->type = MKOP(STORE, PREFIXED, 8);
break;
+   case 62:/* pstxvp */
+   op->reg = VSX_REGISTER_XTP(rd);
+   op->type = MKOP(STORE_VSX, PREFIXED, 32);
+   op->element_size = 32;
+   op->vsx_flags = VSX_CHECK_VEC;
+   break;
}
break;
case 1: /* Type 01 Eight-Byte Register-to-Register */

base-commit: 6469e8962c20b580b471790fe42367750599
-- 
2.24.1



Re: [PATCH 3/3] powerpc/pseries: Add KVM guest doorbell restrictions

2020-06-29 Thread Nicholas Piggin
Excerpts from Paul Mackerras's message of June 30, 2020 12:27 pm:
> On Sun, Jun 28, 2020 at 01:04:28AM +1000, Nicholas Piggin wrote:
>> KVM guests have certain restrictions and performance quirks when
>> using doorbells. This patch tests for KVM environment in doorbell
>> setup, and optimises IPI performance:
>> 
>>  - PowerVM guests may now use doorbells even if they are secure.
>> 
>>  - KVM guests no longer use doorbells if XIVE is available.
> 
> It seems, from the fact that you completely remove
> kvm_para_available(), that you perhaps haven't tried building with
> CONFIG_KVM_GUEST=y.

It's still there and builds:

static inline int kvm_para_available(void)
{
return IS_ENABLED(CONFIG_KVM_GUEST) && is_kvm_guest();
}

but...

> Somewhat confusingly, that option is not used or
> needed when building for a PAPR guest (i.e. the "pseries" platform)
> but is used on non-IBM platforms using the "epapr" hypervisor
> interface.

... is_kvm_guest() returns false on !PSERIES now. Not intended
to break EPAPR. I'm not sure of a good way to share this between
EPAPR and PSERIES, I might just make a copy of it but I'll see.

Thanks,
Nick


Re: [PATCH] ASoC: fsl_asrc: Add an option to select internal ratio mode

2020-06-29 Thread Shengjiu Wang
On Tue, Jun 30, 2020 at 4:09 AM Nicolin Chen  wrote:
>
> On Mon, Jun 29, 2020 at 09:58:35PM +0800, Shengjiu Wang wrote:
> > The ASRC not only supports ideal ratio mode, but also supports
> > internal ratio mode.
> >
> > For internal rato mode, the rate of clock source should be divided
> > with no remainder by sample rate, otherwise there is sound
> > distortion.
> >
> > Add function fsl_asrc_select_clk() to find proper clock source for
> > internal ratio mode, if the clock source is available then internal
> > ratio mode will be selected.
> >
> > With change, the ideal ratio mode is not the only option for user.
> >
> > Signed-off-by: Shengjiu Wang 
> > ---
>
> > +static int fsl_asrc_select_clk(struct fsl_asrc_priv *asrc_priv,
> > +struct fsl_asrc_pair *pair,
> > +int in_rate,
> > +int out_rate)
> > +{
> > + struct fsl_asrc_pair_priv *pair_priv = pair->private;
> > + struct asrc_config *config = pair_priv->config;
> > + int rate[2], select_clk[2]; /* Array size 2 means IN and OUT */
> > + int clk_rate, clk_index;
> > + int i = 0, j = 0;
> > + bool clk_sel[2];
> > +
> > + rate[0] = in_rate;
> > + rate[1] = out_rate;
> > +
> > + /* Select proper clock source for internal ratio mode */
> > + for (j = 0; j < 2; j++) {
> > + for (i = 0; i < ASRC_CLK_MAP_LEN; i++) {
> > + clk_index = asrc_priv->clk_map[j][i];
> > + clk_rate = 
> > clk_get_rate(asrc_priv->asrck_clk[clk_index]);
>
> +   /* Only match a perfect clock source with no 
> remainder */
>
> > + if (clk_rate != 0 && (clk_rate / rate[j]) <= 1024 &&
> > + (clk_rate % rate[j]) == 0)
> > + break;
> > + }
> > +
> > + if (i == ASRC_CLK_MAP_LEN) {
> > + select_clk[j] = OUTCLK_ASRCK1_CLK;
> > + clk_sel[j] = false;
> > + } else {
> > + select_clk[j] = i;
> > + clk_sel[j] = true;
> > + }
> > + }
> > +
> > + /* Switch to ideal ratio mode if there is no proper clock source */
> > + if (!clk_sel[IN] || !clk_sel[OUT])
> > + select_clk[IN] = INCLK_NONE;
>
> Could get rid of clk_set:
>
> for (j) {
> for (i) {
> if (match)
> break;
> }
>
> clk[j] = i;
> }
>
> if (clk[IN] == ASRC_CLK_MAP_LEN || clk[OUT] == ASRC_CLK_MAP_LEN)
>
> And it only overrides clk[IN] setting but leaving clk[OUT] to
> to the searching result. This means that clk[OUT] may be using
> a clock source other than OUTCLK_ASRCK1_CLK if sel[IN] happens
> to be false while sel[OUT] happens to be true. Not sure if it
> is intended...but I feel it would probably be safer to use the
> previous settings: INCLK_NONE + OUTCLK_ASRCK1_CLK?

ok, will update the patch.

best regards
wang shengjiu


Re: [PATCH v2 3/3] powerpc/mm/book3s64/radix: Off-load TLB invalidations to host when !GTSE

2020-06-29 Thread Aneesh Kumar K.V
Bharata B Rao  writes:

> From: Nicholas Piggin 
>
> When platform doesn't support GTSE, let TLB invalidation requests
> for radix guests be off-loaded to the host using H_RPT_INVALIDATE
> hcall.
>

Reviewed-by: Aneesh Kumar K.V 

> Signed-off-by: Nicholas Piggin 
> Signed-off-by: Bharata B Rao 
>   [hcall wrapper, error path handling and renames]
> ---
>  .../include/asm/book3s/64/tlbflush-radix.h| 15 
>  arch/powerpc/include/asm/hvcall.h | 34 +++-
>  arch/powerpc/include/asm/plpar_wrappers.h | 50 +++
>  arch/powerpc/mm/book3s64/radix_tlb.c  | 82 +--
>  4 files changed, 173 insertions(+), 8 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h 
> b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
> index ca8db193ae38..e7cf50358411 100644
> --- a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
> +++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
> @@ -2,10 +2,25 @@
>  #ifndef _ASM_POWERPC_TLBFLUSH_RADIX_H
>  #define _ASM_POWERPC_TLBFLUSH_RADIX_H
>  
> +#include 
> +
>  struct vm_area_struct;
>  struct mm_struct;
>  struct mmu_gather;
>  
> +static inline u64 psize_to_h_rpti(unsigned long psize)
> +{
> + if (psize == MMU_PAGE_4K)
> + return H_RPTI_PAGE_4K;
> + if (psize == MMU_PAGE_64K)
> + return H_RPTI_PAGE_64K;
> + if (psize == MMU_PAGE_2M)
> + return H_RPTI_PAGE_2M;
> + if (psize == MMU_PAGE_1G)
> + return H_RPTI_PAGE_1G;
> + return H_RPTI_PAGE_ALL;
> +}
> +
>  static inline int mmu_get_ap(int psize)
>  {
>   return mmu_psize_defs[psize].ap;
> diff --git a/arch/powerpc/include/asm/hvcall.h 
> b/arch/powerpc/include/asm/hvcall.h
> index e90c073e437e..43486e773bd6 100644
> --- a/arch/powerpc/include/asm/hvcall.h
> +++ b/arch/powerpc/include/asm/hvcall.h
> @@ -305,7 +305,8 @@
>  #define H_SCM_UNBIND_ALL0x3FC
>  #define H_SCM_HEALTH0x400
>  #define H_SCM_PERFORMANCE_STATS 0x418
> -#define MAX_HCALL_OPCODE H_SCM_PERFORMANCE_STATS
> +#define H_RPT_INVALIDATE 0x448
> +#define MAX_HCALL_OPCODE H_RPT_INVALIDATE
>  
>  /* Scope args for H_SCM_UNBIND_ALL */
>  #define H_UNBIND_SCOPE_ALL (0x1)
> @@ -389,6 +390,37 @@
>  #define PROC_TABLE_RADIX 0x04
>  #define PROC_TABLE_GTSE  0x01
>  
> +/*
> + * Defines for
> + * H_RPT_INVALIDATE - Invalidate RPT translation lookaside information.
> + */
> +
> +/* Type of translation to invalidate (type) */
> +#define H_RPTI_TYPE_NESTED   0x0001  /* Invalidate nested guest 
> partition-scope */
> +#define H_RPTI_TYPE_TLB  0x0002  /* Invalidate TLB */
> +#define H_RPTI_TYPE_PWC  0x0004  /* Invalidate Page Walk Cache */
> +/* Invalidate Process Table Entries if H_RPTI_TYPE_NESTED is clear */
> +#define H_RPTI_TYPE_PRT  0x0008
> +/* Invalidate Partition Table Entries if H_RPTI_TYPE_NESTED is set */
> +#define H_RPTI_TYPE_PAT  0x0008
> +#define H_RPTI_TYPE_ALL  (H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC | \
> +  H_RPTI_TYPE_PRT)
> +#define H_RPTI_TYPE_NESTED_ALL   (H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC | \
> +  H_RPTI_TYPE_PAT)
> +
> +/* Invalidation targets (target) */
> +#define H_RPTI_TARGET_CMMU   0x01 /* All virtual processors in the 
> partition */
> +#define H_RPTI_TARGET_CMMU_LOCAL 0x02 /* Current virtual processor */
> +/* All nest/accelerator agents in use by the partition */
> +#define H_RPTI_TARGET_NMMU   0x04
> +
> +/* Page size mask (page sizes) */
> +#define H_RPTI_PAGE_4K   0x01
> +#define H_RPTI_PAGE_64K  0x02
> +#define H_RPTI_PAGE_2M   0x04
> +#define H_RPTI_PAGE_1G   0x08
> +#define H_RPTI_PAGE_ALL (-1UL)
> +
>  #ifndef __ASSEMBLY__
>  #include 
>  
> diff --git a/arch/powerpc/include/asm/plpar_wrappers.h 
> b/arch/powerpc/include/asm/plpar_wrappers.h
> index 4497c8afb573..a184923abd07 100644
> --- a/arch/powerpc/include/asm/plpar_wrappers.h
> +++ b/arch/powerpc/include/asm/plpar_wrappers.h
> @@ -334,6 +334,49 @@ static inline long plpar_get_cpu_characteristics(struct 
> h_cpu_char_result *p)
>   return rc;
>  }
>  
> +/*
> + * Wrapper to H_RPT_INVALIDATE hcall that handles return values appropriately
> + *
> + * - Returns H_SUCCESS on success
> + * - For H_BUSY return value, we retry the hcall.
> + * - For any other hcall failures, attempt a full flush once before
> + *   resorting to BUG().
> + *
> + * Note: This hcall is expected to fail only very rarely. The correct
> + * error recovery of killing the process/guest will be eventually
> + * needed.
> + */
> +static inline long pseries_rpt_invalidate(u32 pid, u64 target, u64 type,
> +   u64 page_sizes, u64 start, u64 end)
> +{
> + long rc;
> + unsigned long all;
> +
> + while (true) {
> + rc = plpar_hcall_norets(H_RPT_INVALIDATE, pid, target, type,
> +

Re: [PATCH v2 2/3] powerpc/pseries: H_REGISTER_PROC_TBL should ask for GTSE only if enabled

2020-06-29 Thread Aneesh Kumar K.V
Bharata B Rao  writes:

> H_REGISTER_PROC_TBL asks for GTSE by default. GTSE flag bit should
> be set only when GTSE is supported.
>
Reviewed-by: Aneesh Kumar K.V 

> Signed-off-by: Bharata B Rao 
> ---
>  arch/powerpc/platforms/pseries/lpar.c | 8 +---
>  1 file changed, 5 insertions(+), 3 deletions(-)
>
> diff --git a/arch/powerpc/platforms/pseries/lpar.c 
> b/arch/powerpc/platforms/pseries/lpar.c
> index fd26f3d21d7b..f82569a505f1 100644
> --- a/arch/powerpc/platforms/pseries/lpar.c
> +++ b/arch/powerpc/platforms/pseries/lpar.c
> @@ -1680,9 +1680,11 @@ static int 
> pseries_lpar_register_process_table(unsigned long base,
>  
>   if (table_size)
>   flags |= PROC_TABLE_NEW;
> - if (radix_enabled())
> - flags |= PROC_TABLE_RADIX | PROC_TABLE_GTSE;
> - else
> + if (radix_enabled()) {
> + flags |= PROC_TABLE_RADIX;
> + if (mmu_has_feature(MMU_FTR_GTSE))
> + flags |= PROC_TABLE_GTSE;
> + } else
>   flags |= PROC_TABLE_HPT_SLB;
>   for (;;) {
>   rc = plpar_hcall_norets(H_REGISTER_PROC_TBL, flags, base,
> -- 
> 2.21.3


Re: [PATCH v2 1/3] powerpc/mm: Enable radix GTSE only if supported.

2020-06-29 Thread Aneesh Kumar K.V
Bharata B Rao  writes:

> Make GTSE an MMU feature and enable it by default for radix.
> However for guest, conditionally enable it if hypervisor supports
> it via OV5 vector. Let prom_init ask for radix GTSE only if the
> support exists.
>
> Having GTSE as an MMU feature will make it easy to enable radix
> without GTSE. Currently radix assumes GTSE is enabled by default.
>

Reviewed-by: Aneesh Kumar K.V 

> Signed-off-by: Bharata B Rao 
> ---
>  arch/powerpc/include/asm/mmu.h|  4 
>  arch/powerpc/kernel/dt_cpu_ftrs.c |  1 +
>  arch/powerpc/kernel/prom_init.c   | 13 -
>  arch/powerpc/mm/init_64.c |  5 -
>  4 files changed, 17 insertions(+), 6 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h
> index f4ac25d4df05..884d51995934 100644
> --- a/arch/powerpc/include/asm/mmu.h
> +++ b/arch/powerpc/include/asm/mmu.h
> @@ -28,6 +28,9 @@
>   * Individual features below.
>   */
>  
> +/* Guest Translation Shootdown Enable */
> +#define MMU_FTR_GTSE ASM_CONST(0x1000)
> +
>  /*
>   * Support for 68 bit VA space. We added that from ISA 2.05
>   */
> @@ -173,6 +176,7 @@ enum {
>  #endif
>  #ifdef CONFIG_PPC_RADIX_MMU
>   MMU_FTR_TYPE_RADIX |
> + MMU_FTR_GTSE |
>  #ifdef CONFIG_PPC_KUAP
>   MMU_FTR_RADIX_KUAP |
>  #endif /* CONFIG_PPC_KUAP */
> diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c 
> b/arch/powerpc/kernel/dt_cpu_ftrs.c
> index a0edeb391e3e..ac650c233cd9 100644
> --- a/arch/powerpc/kernel/dt_cpu_ftrs.c
> +++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
> @@ -336,6 +336,7 @@ static int __init feat_enable_mmu_radix(struct 
> dt_cpu_feature *f)
>  #ifdef CONFIG_PPC_RADIX_MMU
>   cur_cpu_spec->mmu_features |= MMU_FTR_TYPE_RADIX;
>   cur_cpu_spec->mmu_features |= MMU_FTRS_HASH_BASE;
> + cur_cpu_spec->mmu_features |= MMU_FTR_GTSE;
>   cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_MMU;
>  
>   return 1;
> diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
> index 90c604d00b7d..cbc605cfdec0 100644
> --- a/arch/powerpc/kernel/prom_init.c
> +++ b/arch/powerpc/kernel/prom_init.c
> @@ -1336,12 +1336,15 @@ static void __init prom_check_platform_support(void)
>   }
>   }
>  
> - if (supported.radix_mmu && supported.radix_gtse &&
> - IS_ENABLED(CONFIG_PPC_RADIX_MMU)) {
> - /* Radix preferred - but we require GTSE for now */
> - prom_debug("Asking for radix with GTSE\n");
> + if (supported.radix_mmu && IS_ENABLED(CONFIG_PPC_RADIX_MMU)) {
> + /* Radix preferred - Check if GTSE is also supported */
> + prom_debug("Asking for radix\n");
>   ibm_architecture_vec.vec5.mmu = OV5_FEAT(OV5_MMU_RADIX);
> - ibm_architecture_vec.vec5.radix_ext = OV5_FEAT(OV5_RADIX_GTSE);
> + if (supported.radix_gtse)
> + ibm_architecture_vec.vec5.radix_ext =
> + OV5_FEAT(OV5_RADIX_GTSE);
> + else
> + prom_debug("Radix GTSE isn't supported\n");
>   } else if (supported.hash_mmu) {
>   /* Default to hash mmu (if we can) */
>   prom_debug("Asking for hash\n");
> diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
> index bc73abf0bc25..152aa0200cef 100644
> --- a/arch/powerpc/mm/init_64.c
> +++ b/arch/powerpc/mm/init_64.c
> @@ -407,12 +407,15 @@ static void __init early_check_vec5(void)
>   if (!(vec5[OV5_INDX(OV5_RADIX_GTSE)] &
>   OV5_FEAT(OV5_RADIX_GTSE))) {
>   pr_warn("WARNING: Hypervisor doesn't support RADIX with 
> GTSE\n");
> - }
> + cur_cpu_spec->mmu_features &= ~MMU_FTR_GTSE;
> + } else
> + cur_cpu_spec->mmu_features |= MMU_FTR_GTSE;
>   /* Do radix anyway - the hypervisor said we had to */
>   cur_cpu_spec->mmu_features |= MMU_FTR_TYPE_RADIX;
>   } else if (mmu_supported == OV5_FEAT(OV5_MMU_HASH)) {
>   /* Hypervisor only supports hash - disable radix */
>   cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX;
> + cur_cpu_spec->mmu_features &= ~MMU_FTR_GTSE;
>   }
>  }
>  
> -- 
> 2.21.3


Re: [PATCH v6 7/8] powerpc/pmem: Add WARN_ONCE to catch the wrong usage of pmem flush functions.

2020-06-29 Thread Aneesh Kumar K.V
Dan Williams  writes:

> On Mon, Jun 29, 2020 at 6:58 AM Aneesh Kumar K.V
>  wrote:
>>
>> We only support persistent memory on P8 and above. This is enforced by the
>> firmware and further checked on virtualzied platform during platform init.
>> Add WARN_ONCE in pmem flush routines to catch the wrong usage of these.
>>
>> Signed-off-by: Aneesh Kumar K.V 
>> ---
>>  arch/powerpc/include/asm/cacheflush.h | 2 ++
>>  arch/powerpc/lib/pmem.c   | 2 ++
>>  2 files changed, 4 insertions(+)
>>
>> diff --git a/arch/powerpc/include/asm/cacheflush.h 
>> b/arch/powerpc/include/asm/cacheflush.h
>> index 95782f77d768..1ab0fa660497 100644
>> --- a/arch/powerpc/include/asm/cacheflush.h
>> +++ b/arch/powerpc/include/asm/cacheflush.h
>> @@ -103,6 +103,8 @@ static inline void  arch_pmem_flush_barrier(void)
>>  {
>> if (cpu_has_feature(CPU_FTR_ARCH_207S))
>> asm volatile(PPC_PHWSYNC ::: "memory");
>> +   else
>> +   WARN_ONCE(1, "Using pmem flush on older hardware.");
>
> This seems too late to be making this determination. I'd expect the
> driver to fail to successfully bind default if this constraint is not
> met.

We do that in Patch 1.

-aneesh


Re: [PATCH v6 5/8] powerpc/pmem/of_pmem: Update of_pmem to use the new barrier instruction.

2020-06-29 Thread Aneesh Kumar K.V
Dan Williams  writes:

> On Mon, Jun 29, 2020 at 6:58 AM Aneesh Kumar K.V
>  wrote:
>>
>> of_pmem on POWER10 can now use phwsync instead of hwsync to ensure
>> all previous writes are architecturally visible for the platform
>> buffer flush.
>>
>> Signed-off-by: Aneesh Kumar K.V 
>> ---
>>  arch/powerpc/include/asm/cacheflush.h | 7 +++
>>  1 file changed, 7 insertions(+)
>>
>> diff --git a/arch/powerpc/include/asm/cacheflush.h 
>> b/arch/powerpc/include/asm/cacheflush.h
>> index 54764c6e922d..95782f77d768 100644
>> --- a/arch/powerpc/include/asm/cacheflush.h
>> +++ b/arch/powerpc/include/asm/cacheflush.h
>> @@ -98,6 +98,13 @@ static inline void invalidate_dcache_range(unsigned long 
>> start,
>> mb();   /* sync */
>>  }
>>
>> +#define arch_pmem_flush_barrier arch_pmem_flush_barrier
>> +static inline void  arch_pmem_flush_barrier(void)
>> +{
>> +   if (cpu_has_feature(CPU_FTR_ARCH_207S))
>> +   asm volatile(PPC_PHWSYNC ::: "memory");
>
> Shouldn't this fallback to a compatible store-fence in an else statement?

The idea was to avoid calling this on anything else. We ensure that by
making sure that pmem devices are not initialized on systems without that
cpu feature. Patch 1 does that. Also, the last patch adds a WARN_ON() to
catch the usage of this outside pmem devices and on systems without that
cpu feature.

-aneesh


Re: [PATCH updated] libnvdimm/nvdimm/flush: Allow architecture to override the flush barrier

2020-06-29 Thread Aneesh Kumar K.V
Dan Williams  writes:

> On Mon, Jun 29, 2020 at 1:29 PM Aneesh Kumar K.V
>  wrote:
>>
>> Architectures like ppc64 provide persistent memory specific barriers
>> that will ensure that all stores for which the modifications are
>> written to persistent storage by preceding dcbfps and dcbstps
>> instructions have updated persistent storage before any data
>> access or data transfer caused by subsequent instructions is initiated.
>> This is in addition to the ordering done by wmb()
>>
>> Update nvdimm core such that architecture can use barriers other than
>> wmb to ensure all previous writes are architecturally visible for
>> the platform buffer flush.
>>
>> Signed-off-by: Aneesh Kumar K.V 
>> ---
>>  drivers/md/dm-writecache.c   | 2 +-
>>  drivers/nvdimm/region_devs.c | 8 
>>  include/linux/libnvdimm.h| 4 
>>  3 files changed, 9 insertions(+), 5 deletions(-)
>>
>> diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c
>> index 74f3c506f084..8c6b6dce64e2 100644
>> --- a/drivers/md/dm-writecache.c
>> +++ b/drivers/md/dm-writecache.c
>> @@ -536,7 +536,7 @@ static void ssd_commit_superblock(struct dm_writecache 
>> *wc)
>>  static void writecache_commit_flushed(struct dm_writecache *wc, bool 
>> wait_for_ios)
>>  {
>> if (WC_MODE_PMEM(wc))
>> -   wmb();
>> +   arch_pmem_flush_barrier();
>> else
>> ssd_commit_flushed(wc, wait_for_ios);
>>  }
>> diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c
>> index 4502f9c4708d..b308ad09b63d 100644
>> --- a/drivers/nvdimm/region_devs.c
>> +++ b/drivers/nvdimm/region_devs.c
>> @@ -1206,13 +1206,13 @@ int generic_nvdimm_flush(struct nd_region *nd_region)
>> idx = this_cpu_add_return(flush_idx, hash_32(current->pid + idx, 8));
>>
>> /*
>> -* The first wmb() is needed to 'sfence' all previous writes
>> -* such that they are architecturally visible for the platform
>> -* buffer flush.  Note that we've already arranged for pmem
>> +* The first arch_pmem_flush_barrier() is needed to 'sfence' all
>> +* previous writes such that they are architecturally visible for
>> +* the platform buffer flush. Note that we've already arranged for 
>> pmem
>>  * writes to avoid the cache via memcpy_flushcache().  The final
>>  * wmb() ensures ordering for the NVDIMM flush write.
>>  */
>> -   wmb();
>> +   arch_pmem_flush_barrier();
>> for (i = 0; i < nd_region->ndr_mappings; i++)
>> if (ndrd_get_flush_wpq(ndrd, i, 0))
>> writeq(1, ndrd_get_flush_wpq(ndrd, i, idx));
>> diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h
>> index 18da4059be09..66f6c65bd789 100644
>> --- a/include/linux/libnvdimm.h
>> +++ b/include/linux/libnvdimm.h
>> @@ -286,4 +286,8 @@ static inline void arch_invalidate_pmem(void *addr, 
>> size_t size)
>>  }
>>  #endif
>>
>> +#ifndef arch_pmem_flush_barrier
>> +#define arch_pmem_flush_barrier() wmb()
>> +#endif
>
> I think it is out of place to define this in libnvdimm.h and it is odd
> to give it such a long name. The other pmem api helpers like
> arch_wb_cache_pmem() and arch_invalidate_pmem() are function calls for
> libnvdimm driver operations, this barrier is just an instruction and
> is closer to wmb() than the pmem api routine.
>
> Since it is a store fence for pmem, so let's just call it pmem_wmb()
> and define the generic version in include/linux/compiler.h. It should
> probably also be documented alongside dma_wmb() in
> Documentation/memory-barriers.txt about why code would use it over
> wmb(), and why a symmetric pmem_rmb() is not needed.

How about the below? I used pmem_barrier() instead of pmem_wmb(). I
guess we wanted this to order() any data access not jus the following
stores to persistent storage? W.r.t why a symmetric pmem_rmb() is not
needed I was not sure how to explain that. Are you suggesting to explain
why a read/load from persistent storage don't want to wait for
pmem_barrier() ?

modified   Documentation/memory-barriers.txt
@@ -1935,6 +1935,16 @@ There are some more advanced barrier functions:
  relaxed I/O accessors and the Documentation/DMA-API.txt file for more
  information on consistent memory.
 
+ (*) pmem_barrier();
+
+ These are for use with persistent memory to esure the ordering of stores
+ to persistent memory region.
+
+ For example, after a non temporal write to persistent storage we use 
pmem_barrier()
+ to ensures that stores have updated the persistent storage before
+ any data access or data transfer caused by subsequent instructions is 
initiated.
+
 
 ===
 IMPLICIT KERNEL MEMORY BARRIERS
modified   arch/powerpc/include/asm/barrier.h
@@ -97,6 +97,19 @@ do { 
\
 #define barrier_nospec()
 #endif /* CONFIG_PPC_BARRIER_NOSPEC */
 
+/*
+ * pmem_barrier() 

Re: [PATCH v5 3/3] mm/page_alloc: Keep memoryless cpuless node 0 offline

2020-06-29 Thread Srikar Dronamraju
* Christopher Lameter  [2020-06-29 14:58:40]:

> On Wed, 24 Jun 2020, Srikar Dronamraju wrote:
> 
> > Currently Linux kernel with CONFIG_NUMA on a system with multiple
> > possible nodes, marks node 0 as online at boot.  However in practice,
> > there are systems which have node 0 as memoryless and cpuless.
> 
> Maybe add something to explain why you are not simply mapping the
> existing memory to NUMA node 0 which is after all just a numbering scheme
> used by the kernel and can be used arbitrarily?
> 

I thought Michal Hocko already gave a clear picture on why mapping is a bad
idea. https://lore.kernel.org/lkml/20200316085425.gb11...@dhcp22.suse.cz/t/#u
Are you suggesting that we add that as part of the changelog?

> This could be seen more as a bug in the arch code during the setup of NUMA
> nodes. The two nodes are created by the firmwware / bootstrap code after
> all. Just do not do it?
> 

- The arch/setup code in powerpc is not onlining these nodes. 
- Later on cpus/memory in node 0 can be onlined.
- Firmware in this case Phyp is an independent code by itself.

-- 
Thanks and Regards
Srikar Dronamraju


Re: [PATCH V3 0/4] mm/debug_vm_pgtable: Add some more tests

2020-06-29 Thread Anshuman Khandual



On 06/24/2020 08:43 AM, Anshuman Khandual wrote:
> 
> 
> On 06/15/2020 09:07 AM, Anshuman Khandual wrote:
>> This series adds some more arch page table helper validation tests which
>> are related to core and advanced memory functions. This also creates a
>> documentation, enlisting expected semantics for all page table helpers as
>> suggested by Mike Rapoport previously (https://lkml.org/lkml/2020/1/30/40).
>>
>> There are many TRANSPARENT_HUGEPAGE and ARCH_HAS_TRANSPARENT_HUGEPAGE_PUD
>> ifdefs scattered across the test. But consolidating all the fallback stubs
>> is not very straight forward because ARCH_HAS_TRANSPARENT_HUGEPAGE_PUD is
>> not explicitly dependent on ARCH_HAS_TRANSPARENT_HUGEPAGE.
>>
>> Tested on arm64, x86 platforms but only build tested on all other enabled
>> platforms through ARCH_HAS_DEBUG_VM_PGTABLE i.e powerpc, arc, s390. The
>> following failure on arm64 still exists which was mentioned previously. It
>> will be fixed with the upcoming THP migration on arm64 enablement series.
>>
>> WARNING  mm/debug_vm_pgtable.c:860 debug_vm_pgtable+0x940/0xa54
>> WARN_ON(!pmd_present(pmd_mkinvalid(pmd_mkhuge(pmd
>>
>> This series is based on v5.8-rc1.
>>
>> Changes in V3:
>>
>> - Replaced HAVE_ARCH_SOFT_DIRTY with MEM_SOFT_DIRTY
>> - Added HAVE_ARCH_HUGE_VMAP checks in pxx_huge_tests() per Gerald
>> - Updated documentation for pmd_thp_tests() per Zi Yan
>> - Replaced READ_ONCE() with huge_ptep_get() per Gerald
>> - Added pte_mkhuge() and masking with PMD_MASK per Gerald
>> - Replaced pte_same() with holding pfn check in pxx_swap_tests()
>> - Added documentation for all (#ifdef #else #endif) per Gerald
>> - Updated pmd_protnone_tests() per Gerald
>> - Updated HugeTLB PTE creation in hugetlb_advanced_tests() per Gerald
>> - Replaced [pmd|pud]_mknotpresent() with [pmd|pud]_mkinvalid()
>> - Added has_transparent_hugepage() check for PMD and PUD tests
>> - Added a patch which debug prints all individual tests being executed
>> - Updated documentation for renamed [pmd|pud]_mkinvalid() helpers
> 
> Hello Gerald/Christophe/Vineet,
> 
> It would be really great if you could give this series a quick test
> on s390/ppc/arc platforms respectively. Thank you.

Thanks Alexander, Gerald and Christophe for testing this out on s390
and ppc32 platforms. Probably Vineet and Qian (any other volunteers)
could help us with arc and ppc64 platforms, which I would appreciate.


Re: [PATCH 04/11] ppc64/kexec_file: avoid stomping memory used by special regions

2020-06-29 Thread piliu



On 06/29/2020 01:55 PM, Hari Bathini wrote:
> 
> 
> On 28/06/20 7:44 am, piliu wrote:
>> Hi Hari,
> 
> Hi Pingfan,
> 
>>
>> After a quick through for this series, I have a few question/comment on
>> this patch for the time being. Pls see comment inline.
>>
>> On 06/27/2020 03:05 AM, Hari Bathini wrote:
>>> crashkernel region could have an overlap with special memory regions
>>> like  opal, rtas, tce-table & such. These regions are referred to as
>>> exclude memory ranges. Setup this ranges during image probe in order
>>> to avoid them while finding the buffer for different kdump segments.
> 
> [...]
> 
>>> +   /*
>>> +* Use the locate_mem_hole logic in kexec_add_buffer() for regular
>>> +* kexec_file_load syscall
>>> +*/
>>> +   if (kbuf->image->type != KEXEC_TYPE_CRASH)
>>> +   return 0;
>> Can the ranges overlap [crashk_res.start, crashk_res.end]?  Otherwise
>> there is no requirement for @exclude_ranges.
> 
> The ranges like rtas, opal are loaded by f/w. They almost always overlap with
> crashkernel region. So, @exclude_ranges is required to support kdump.
f/w passes rtas/opal as service, then must f/w mark these ranges as
fdt_reserved_mem in order to make kernel aware not to use these ranges?
Otherwise kernel memory allocation besides kdump can also overwrite
these ranges.

Hmm, revisiting reserve_crashkernel(). It seems not to take any reserved
memory into consider except kernel text. Could it work based on memblock
allocator?

Thanks,
Pingfan
> 
>> I guess you have a design for future. If not true, then it is better to
>> fold the condition "if (kbuf->image->type != KEXEC_TYPE_CRASH)" into the
>> caller and rename this function to better distinguish use cases between
>> kexec and kdump
> 
> Yeah, this condition will be folded. I have a follow-up patch for that 
> explaining
> why kexec case should also be folded. Will try to add that to this series for 
> v2.
> 
> Thanks
> Hari
> 



Re: [PATCH] xmon: Reset RCU and soft lockup watchdogs

2020-06-29 Thread Nicholas Piggin
Excerpts from Anton Blanchard's message of June 30, 2020 10:02 am:
> I'm seeing RCU warnings when exiting xmon. xmon resets the NMI watchdog,
> but does nothing with the RCU stall or soft lockup watchdogs. Add a
> helper function that handles all three.
> 
> Signed-off-by: Anton Blanchard 

Acked-by: Nicholas Piggin 

> ---
>  arch/powerpc/xmon/xmon.c | 9 -
>  1 file changed, 8 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
> index 7efe4bc3ccf6..d27944e38b04 100644
> --- a/arch/powerpc/xmon/xmon.c
> +++ b/arch/powerpc/xmon/xmon.c
> @@ -481,6 +481,13 @@ static inline int unrecoverable_excp(struct pt_regs 
> *regs)
>  #endif
>  }
>  
> +static void xmon_touch_watchdogs(void)
> +{
> + touch_softlockup_watchdog_sync();
> + rcu_cpu_stall_reset();
> + touch_nmi_watchdog();
> +}
> +
>  static int xmon_core(struct pt_regs *regs, int fromipi)
>  {
>   int cmd = 0;
> @@ -718,7 +725,7 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
>   else
>   insert_cpu_bpts();
>  
> - touch_nmi_watchdog();
> + xmon_touch_watchdogs();
>   local_irq_restore(flags);
>  
>   return cmd != 'X' && cmd != EOF;
> -- 
> 2.26.2
> 
> 


Re: [PATCH 3/3] powerpc/pseries: Add KVM guest doorbell restrictions

2020-06-29 Thread Paul Mackerras
On Sun, Jun 28, 2020 at 01:04:28AM +1000, Nicholas Piggin wrote:
> KVM guests have certain restrictions and performance quirks when
> using doorbells. This patch tests for KVM environment in doorbell
> setup, and optimises IPI performance:
> 
>  - PowerVM guests may now use doorbells even if they are secure.
> 
>  - KVM guests no longer use doorbells if XIVE is available.

It seems, from the fact that you completely remove
kvm_para_available(), that you perhaps haven't tried building with
CONFIG_KVM_GUEST=y.  Somewhat confusingly, that option is not used or
needed when building for a PAPR guest (i.e. the "pseries" platform)
but is used on non-IBM platforms using the "epapr" hypervisor
interface.

If you did intend to remove support for the epapr hypervisor interface
then that should have been talked about in the commit message (and
would I expect be controversial).

So NAK on the kvm_para_available() removal.

Paul.


Re: [PATCH] kbuild: introduce ccflags-remove-y and asflags-remove-y

2020-06-29 Thread Masahiro Yamada
On Mon, Jun 29, 2020 at 2:55 PM Michael Ellerman  wrote:
>
> Masahiro Yamada  writes:
> > CFLAGS_REMOVE_.o works per object, that is, there is no
> > convenient way to filter out flags for every object in a directory.
> >
> > Add ccflags-remove-y and asflags-remove-y to make it easily.
> >
> > Use ccflags-remove-y to clean up some Makefiles.
> >
> > Suggested-by: Sami Tolvanen 
> > Signed-off-by: Masahiro Yamada 
> > ---
> >
> >  arch/arm/boot/compressed/Makefile | 6 +-
> >  arch/powerpc/xmon/Makefile| 3 +--
> >  arch/sh/boot/compressed/Makefile  | 5 +
> >  kernel/trace/Makefile | 4 ++--
> >  lib/Makefile  | 5 +
> >  scripts/Makefile.lib  | 4 ++--
> >  6 files changed, 8 insertions(+), 19 deletions(-)
> >
> > diff --git a/arch/powerpc/xmon/Makefile b/arch/powerpc/xmon/Makefile
> > index 89c76ca35640..55cbcdd88ac0 100644
> > --- a/arch/powerpc/xmon/Makefile
> > +++ b/arch/powerpc/xmon/Makefile
> > @@ -7,8 +7,7 @@ UBSAN_SANITIZE := n
> >  KASAN_SANITIZE := n
> >
> >  # Disable ftrace for the entire directory
> > -ORIG_CFLAGS := $(KBUILD_CFLAGS)
> > -KBUILD_CFLAGS = $(subst $(CC_FLAGS_FTRACE),,$(ORIG_CFLAGS))
> > +ccflags-remove-y += $(CC_FLAGS_FTRACE)
>
> This could be:
>
> ccflags-remove-$(CONFIG_FUNCTION_TRACER) += $(CC_FLAGS_FTRACE)
>
> Similar to kernel/trace/Makefile below.


I fixed it up, and applied to linux-kbuild.
Thanks.


> I don't mind though.
>
> Acked-by: Michael Ellerman  (powerpc)
>
> cheers
>
> > diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
> > index 6575bb0a0434..7492844a8b1b 100644
> > --- a/kernel/trace/Makefile
> > +++ b/kernel/trace/Makefile
> > @@ -2,9 +2,9 @@
> >
> >  # Do not instrument the tracer itself:
> >
> > +ccflags-remove-$(CONFIG_FUNCTION_TRACER) += $(CC_FLAGS_FTRACE)
> > +
> >  ifdef CONFIG_FUNCTION_TRACER
> > -ORIG_CFLAGS := $(KBUILD_CFLAGS)
> > -KBUILD_CFLAGS = $(subst $(CC_FLAGS_FTRACE),,$(ORIG_CFLAGS))
> >
> >  # Avoid recursion due to instrumentation.
> >  KCSAN_SANITIZE := n



-- 
Best Regards
Masahiro Yamada


[PATCH v2] powerpc: Warn about use of smt_snooze_delay

2020-06-29 Thread Joel Stanley
It's not done anything for a long time. Save the percpu variable, and
emit a warning to remind users to not expect it to do anything.

Fixes: 3fa8cad82b94 ("powerpc/pseries/cpuidle: smt-snooze-delay cleanup.")
Cc: sta...@vger.kernel.org # v3.14
Signed-off-by: Joel Stanley 
--
v2:
 Use pr_warn instead of WARN
 Reword and print proccess name with pid in message
 Leave CPU_FTR_SMT test in
 Add Fixes line

mpe, if you don't agree then feel free to drop the cc stable.

Testing 'ppc64_cpu --smt=off' on a 24 core / 4 SMT system it's quite noisy
as the online/offline loop that ppc64_cpu runs is slow.

This could be fixed by open coding pr_warn_ratelimit with the ratelimit
parameters tweaked if someone was concerned. I'll leave that to someone
else as a future enhancement.

[  237.642088][ T1197] ppc64_cpu (1197) used unsupported smt_snooze_delay, this 
has no effect
[  237.642175][ T1197] ppc64_cpu (1197) used unsupported smt_snooze_delay, this 
has no effect
[  237.642261][ T1197] ppc64_cpu (1197) used unsupported smt_snooze_delay, this 
has no effect
[  237.642345][ T1197] ppc64_cpu (1197) used unsupported smt_snooze_delay, this 
has no effect
[  237.642430][ T1197] ppc64_cpu (1197) used unsupported smt_snooze_delay, this 
has no effect
[  237.642516][ T1197] ppc64_cpu (1197) used unsupported smt_snooze_delay, this 
has no effect
[  237.642625][ T1197] ppc64_cpu (1197) used unsupported smt_snooze_delay, this 
has no effect
[  237.642709][ T1197] ppc64_cpu (1197) used unsupported smt_snooze_delay, this 
has no effect
[  237.642793][ T1197] ppc64_cpu (1197) used unsupported smt_snooze_delay, this 
has no effect
[  237.642878][ T1197] ppc64_cpu (1197) used unsupported smt_snooze_delay, this 
has no effect
[  254.264030][ T1197] store_smt_snooze_delay: 14 callbacks suppressed
[  254.264033][ T1197] ppc64_cpu (1197) used unsupported smt_snooze_delay, this 
has no effect
[  254.264048][ T1197] ppc64_cpu (1197) used unsupported smt_snooze_delay, this 
has no effect
[  254.264062][ T1197] ppc64_cpu (1197) used unsupported smt_snooze_delay, this 
has no effect
[  254.264075][ T1197] ppc64_cpu (1197) used unsupported smt_snooze_delay, this 
has no effect
[  254.264089][ T1197] ppc64_cpu (1197) used unsupported smt_snooze_delay, this 
has no effect
[  254.264103][ T1197] ppc64_cpu (1197) used unsupported smt_snooze_delay, this 
has no effect
[  254.264116][ T1197] ppc64_cpu (1197) used unsupported smt_snooze_delay, this 
has no effect
[  254.264130][ T1197] ppc64_cpu (1197) used unsupported smt_snooze_delay, this 
has no effect
[  254.264143][ T1197] ppc64_cpu (1197) used unsupported smt_snooze_delay, this 
has no effect
[  254.264157][ T1197] ppc64_cpu (1197) used unsupported smt_snooze_delay, this 
has no effect

Signed-off-by: Joel Stanley 
---
 arch/powerpc/kernel/sysfs.c | 41 +++--
 1 file changed, 16 insertions(+), 25 deletions(-)

diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index 571b3259697e..ba6d4cee19ef 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -32,29 +32,26 @@
 
 static DEFINE_PER_CPU(struct cpu, cpu_devices);
 
-/*
- * SMT snooze delay stuff, 64-bit only for now
- */
-
 #ifdef CONFIG_PPC64
 
-/* Time in microseconds we delay before sleeping in the idle loop */
-static DEFINE_PER_CPU(long, smt_snooze_delay) = { 100 };
+/*
+ * Snooze delay has not been hooked up since 3fa8cad82b94 
("powerpc/pseries/cpuidle:
+ * smt-snooze-delay cleanup.") and has been broken even longer. As was 
foretold in
+ * 2014:
+ *
+ *  "ppc64_util currently utilises it. Once we fix ppc64_util, propose to clean
+ *  up the kernel code."
+ *
+ * At some point in the future this code should be removed.
+ */
 
 static ssize_t store_smt_snooze_delay(struct device *dev,
  struct device_attribute *attr,
  const char *buf,
  size_t count)
 {
-   struct cpu *cpu = container_of(dev, struct cpu, dev);
-   ssize_t ret;
-   long snooze;
-
-   ret = sscanf(buf, "%ld", );
-   if (ret != 1)
-   return -EINVAL;
-
-   per_cpu(smt_snooze_delay, cpu->dev.id) = snooze;
+   pr_warn_ratelimited("%s (%d) used unsupported smt_snooze_delay, this 
has no effect\n",
+   current->comm, current->pid);
return count;
 }
 
@@ -62,9 +59,9 @@ static ssize_t show_smt_snooze_delay(struct device *dev,
 struct device_attribute *attr,
 char *buf)
 {
-   struct cpu *cpu = container_of(dev, struct cpu, dev);
-
-   return sprintf(buf, "%ld\n", per_cpu(smt_snooze_delay, cpu->dev.id));
+   pr_warn_ratelimited("%s (%d) used unsupported smt_snooze_delay, this 
has no effect\n",
+   current->comm, current->pid);
+   return sprintf(buf, "100\n");
 }
 
 static DEVICE_ATTR(smt_snooze_delay, 0644, 

Re: [PATCH 1/3] powerpc: inline doorbell sending functions

2020-06-29 Thread Nicholas Piggin
Excerpts from Michael Ellerman's message of June 30, 2020 11:31 am:
> kernel test robot  writes:
>> Hi Nicholas,
>>
>> I love your patch! Yet something to improve:
>>
>> [auto build test ERROR on powerpc/next]
>> [also build test ERROR on scottwood/next v5.8-rc2 next-20200626]
>> [cannot apply to kvm-ppc/kvm-ppc-next]
>> [If your patch is applied to the wrong git tree, kindly drop us a note.
>> And when submitting patch, we suggest to use  as documented in
>> https://git-scm.com/docs/git-format-patch]
>>
>> url:
>> https://github.com/0day-ci/linux/commits/Nicholas-Piggin/powerpc-pseries-IPI-doorbell-improvements/20200627-230544
>> base:   https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git 
>> next
>> config: powerpc-randconfig-c003-20200628 (attached as .config)
>> compiler: powerpc64-linux-gcc (GCC) 9.3.0
> 
>> If you fix the issue, kindly add following tag as appropriate
>> Reported-by: kernel test robot 
>>
>> All error/warnings (new ones prefixed by >>):
>>
>>In file included from arch/powerpc/kernel/asm-offsets.c:38:
>>arch/powerpc/include/asm/dbell.h: In function 'doorbell_global_ipi':
 arch/powerpc/include/asm/dbell.h:114:12: error: implicit declaration of 
 function 'get_hard_smp_processor_id'; did you mean 'raw_smp_processor_id'? 
 [-Werror=implicit-function-declaration]
>>  114 |  u32 tag = get_hard_smp_processor_id(cpu);
>>  |^
>>  |raw_smp_processor_id
>>arch/powerpc/include/asm/dbell.h: In function 'doorbell_try_core_ipi':
 arch/powerpc/include/asm/dbell.h:146:28: error: implicit declaration of 
 function 'cpu_sibling_mask'; did you mean 'cpu_online_mask'? 
 [-Werror=implicit-function-declaration]
>>  146 |  if (cpumask_test_cpu(cpu, cpu_sibling_mask(this_cpu))) {
>>  |^~~~
>>  |cpu_online_mask
 arch/powerpc/include/asm/dbell.h:146:28: warning: passing argument 2 of 
 'cpumask_test_cpu' makes pointer from integer without a cast 
 [-Wint-conversion]
>>  146 |  if (cpumask_test_cpu(cpu, cpu_sibling_mask(this_cpu))) {
>>  |^~
> 
> Seems like CONFIG_SMP=n is probably the root cause.
> 
> You could try including asm/smp.h, but good chance that will lead to
> header soup.

Possibly. dbell.h shouldn't be included by much, but maybe it gets
dragged in.

> 
> Other option would be to wrap the whole lot in #ifdef CONFIG_SMP?

Yeah that might be a better idea.

I'll fix it up and repost if there's no strong objections to
the KVM detection bit.

Thanks,
Nick


Re: [PATCH v6 7/8] powerpc/pmem: Add WARN_ONCE to catch the wrong usage of pmem flush functions.

2020-06-29 Thread Dan Williams
On Mon, Jun 29, 2020 at 6:58 AM Aneesh Kumar K.V
 wrote:
>
> We only support persistent memory on P8 and above. This is enforced by the
> firmware and further checked on virtualzied platform during platform init.
> Add WARN_ONCE in pmem flush routines to catch the wrong usage of these.
>
> Signed-off-by: Aneesh Kumar K.V 
> ---
>  arch/powerpc/include/asm/cacheflush.h | 2 ++
>  arch/powerpc/lib/pmem.c   | 2 ++
>  2 files changed, 4 insertions(+)
>
> diff --git a/arch/powerpc/include/asm/cacheflush.h 
> b/arch/powerpc/include/asm/cacheflush.h
> index 95782f77d768..1ab0fa660497 100644
> --- a/arch/powerpc/include/asm/cacheflush.h
> +++ b/arch/powerpc/include/asm/cacheflush.h
> @@ -103,6 +103,8 @@ static inline void  arch_pmem_flush_barrier(void)
>  {
> if (cpu_has_feature(CPU_FTR_ARCH_207S))
> asm volatile(PPC_PHWSYNC ::: "memory");
> +   else
> +   WARN_ONCE(1, "Using pmem flush on older hardware.");

This seems too late to be making this determination. I'd expect the
driver to fail to successfully bind default if this constraint is not
met.


Re: [PATCH v6 6/8] powerpc/pmem: Avoid the barrier in flush routines

2020-06-29 Thread Dan Williams
On Mon, Jun 29, 2020 at 1:41 PM Aneesh Kumar K.V
 wrote:
>
> Michal Suchánek  writes:
>
> > Hello,
> >
> > On Mon, Jun 29, 2020 at 07:27:20PM +0530, Aneesh Kumar K.V wrote:
> >> nvdimm expect the flush routines to just mark the cache clean. The barrier
> >> that mark the store globally visible is done in nvdimm_flush().
> >>
> >> Update the papr_scm driver to a simplified nvdim_flush callback that do
> >> only the required barrier.
> >>
> >> Signed-off-by: Aneesh Kumar K.V 
> >> ---
> >>  arch/powerpc/lib/pmem.c   |  6 --
> >>  arch/powerpc/platforms/pseries/papr_scm.c | 13 +
> >>  2 files changed, 13 insertions(+), 6 deletions(-)
> >>
> >> diff --git a/arch/powerpc/lib/pmem.c b/arch/powerpc/lib/pmem.c
> >> index 5a61aaeb6930..21210fa676e5 100644
> >> --- a/arch/powerpc/lib/pmem.c
> >> +++ b/arch/powerpc/lib/pmem.c
> >> @@ -19,9 +19,6 @@ static inline void __clean_pmem_range(unsigned long 
> >> start, unsigned long stop)
> >>
> >>  for (i = 0; i < size >> shift; i++, addr += bytes)
> >>  asm volatile(PPC_DCBSTPS(%0, %1): :"i"(0), "r"(addr): 
> >> "memory");
> >> -
> >> -
> >> -asm volatile(PPC_PHWSYNC ::: "memory");
> >>  }
> >>
> >>  static inline void __flush_pmem_range(unsigned long start, unsigned long 
> >> stop)
> >> @@ -34,9 +31,6 @@ static inline void __flush_pmem_range(unsigned long 
> >> start, unsigned long stop)
> >>
> >>  for (i = 0; i < size >> shift; i++, addr += bytes)
> >>  asm volatile(PPC_DCBFPS(%0, %1): :"i"(0), "r"(addr): 
> >> "memory");
> >> -
> >> -
> >> -asm volatile(PPC_PHWSYNC ::: "memory");
> >>  }
> >>
> >>  static inline void clean_pmem_range(unsigned long start, unsigned long 
> >> stop)
> >> diff --git a/arch/powerpc/platforms/pseries/papr_scm.c 
> >> b/arch/powerpc/platforms/pseries/papr_scm.c
> >> index 9c569078a09f..9a9a0766f8b6 100644
> >> --- a/arch/powerpc/platforms/pseries/papr_scm.c
> >> +++ b/arch/powerpc/platforms/pseries/papr_scm.c
> >> @@ -630,6 +630,18 @@ static int papr_scm_ndctl(struct 
> >> nvdimm_bus_descriptor *nd_desc,
> >>
> >>  return 0;
> >>  }
> >> +/*
> >> + * We have made sure the pmem writes are done such that before calling 
> >> this
> >> + * all the caches are flushed/clean. We use dcbf/dcbfps to ensure this. 
> >> Here
> >> + * we just need to add the necessary barrier to make sure the above 
> >> flushes
> >> + * are have updated persistent storage before any data access or data 
> >> transfer
> >> + * caused by subsequent instructions is initiated.
> >> + */
> >> +static int papr_scm_flush_sync(struct nd_region *nd_region, struct bio 
> >> *bio)
> >> +{
> >> +arch_pmem_flush_barrier();
> >> +return 0;
> >> +}
> >>
> >>  static ssize_t flags_show(struct device *dev,
> >>struct device_attribute *attr, char *buf)
> >> @@ -743,6 +755,7 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv 
> >> *p)
> >>  ndr_desc.mapping = 
> >>  ndr_desc.num_mappings = 1;
> >>  ndr_desc.nd_set = >nd_set;
> >> +ndr_desc.flush = papr_scm_flush_sync;
> >
> > AFAICT currently the only device that implements flush is virtio_pmem.
> > How does the nfit driver get away without implementing flush?
>
> generic_nvdimm_flush does the required barrier for nfit. The reason for
> adding ndr_desc.flush call back for papr_scm was to avoid the usage
> of iomem based deep flushing (ndr_region_data.flush_wpq) which is not
> supported by papr_scm.
>
> BTW we do return NULL for ndrd_get_flush_wpq() on power. So the upstream
> code also does the same thing, but in a different way.
>
>
> > Also the flush takes arguments that are completely unused but a user of
> > the pmem region must assume they are used, and call flush() on the
> > region rather than arch_pmem_flush_barrier() directly.
>
> The bio argument can help a pmem driver to do range based flushing in
> case of pmem_make_request. If bio is null then we must assume a full
> device flush.

The bio argument isn't for range based flushing, it is for flush
operations that need to complete asynchronously.

There's no mechanism for the block layer to communicate range based
cache flushing, block-device flushing is assumed to be the device's
entire cache. For pmem that would be the entirety of the cpu cache.
Instead of modeling the cpu cache as a storage device cache it is
modeled as page-cache. Once the fs-layer writes back page-cache /
cpu-cache the storage device is only responsible for flushing those
cache-writes into the persistence domain.

Additionally there is a concept of deep-flush that relegates some
power-fail scenarios to a smaller failure domain. For example consider
the difference between a write arriving at the head of a device-queue
and successfully traversing a device-queue to media. The expectation
of pmem applications is that data is persisted once they reach the
equivalent of the x86 ADR domain, deep-flush is past ADR.


Re: [PATCH v6 5/8] powerpc/pmem/of_pmem: Update of_pmem to use the new barrier instruction.

2020-06-29 Thread Dan Williams
On Mon, Jun 29, 2020 at 6:58 AM Aneesh Kumar K.V
 wrote:
>
> of_pmem on POWER10 can now use phwsync instead of hwsync to ensure
> all previous writes are architecturally visible for the platform
> buffer flush.
>
> Signed-off-by: Aneesh Kumar K.V 
> ---
>  arch/powerpc/include/asm/cacheflush.h | 7 +++
>  1 file changed, 7 insertions(+)
>
> diff --git a/arch/powerpc/include/asm/cacheflush.h 
> b/arch/powerpc/include/asm/cacheflush.h
> index 54764c6e922d..95782f77d768 100644
> --- a/arch/powerpc/include/asm/cacheflush.h
> +++ b/arch/powerpc/include/asm/cacheflush.h
> @@ -98,6 +98,13 @@ static inline void invalidate_dcache_range(unsigned long 
> start,
> mb();   /* sync */
>  }
>
> +#define arch_pmem_flush_barrier arch_pmem_flush_barrier
> +static inline void  arch_pmem_flush_barrier(void)
> +{
> +   if (cpu_has_feature(CPU_FTR_ARCH_207S))
> +   asm volatile(PPC_PHWSYNC ::: "memory");

Shouldn't this fallback to a compatible store-fence in an else statement?


Re: [PATCH updated] libnvdimm/nvdimm/flush: Allow architecture to override the flush barrier

2020-06-29 Thread Dan Williams
On Mon, Jun 29, 2020 at 1:29 PM Aneesh Kumar K.V
 wrote:
>
> Architectures like ppc64 provide persistent memory specific barriers
> that will ensure that all stores for which the modifications are
> written to persistent storage by preceding dcbfps and dcbstps
> instructions have updated persistent storage before any data
> access or data transfer caused by subsequent instructions is initiated.
> This is in addition to the ordering done by wmb()
>
> Update nvdimm core such that architecture can use barriers other than
> wmb to ensure all previous writes are architecturally visible for
> the platform buffer flush.
>
> Signed-off-by: Aneesh Kumar K.V 
> ---
>  drivers/md/dm-writecache.c   | 2 +-
>  drivers/nvdimm/region_devs.c | 8 
>  include/linux/libnvdimm.h| 4 
>  3 files changed, 9 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c
> index 74f3c506f084..8c6b6dce64e2 100644
> --- a/drivers/md/dm-writecache.c
> +++ b/drivers/md/dm-writecache.c
> @@ -536,7 +536,7 @@ static void ssd_commit_superblock(struct dm_writecache 
> *wc)
>  static void writecache_commit_flushed(struct dm_writecache *wc, bool 
> wait_for_ios)
>  {
> if (WC_MODE_PMEM(wc))
> -   wmb();
> +   arch_pmem_flush_barrier();
> else
> ssd_commit_flushed(wc, wait_for_ios);
>  }
> diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c
> index 4502f9c4708d..b308ad09b63d 100644
> --- a/drivers/nvdimm/region_devs.c
> +++ b/drivers/nvdimm/region_devs.c
> @@ -1206,13 +1206,13 @@ int generic_nvdimm_flush(struct nd_region *nd_region)
> idx = this_cpu_add_return(flush_idx, hash_32(current->pid + idx, 8));
>
> /*
> -* The first wmb() is needed to 'sfence' all previous writes
> -* such that they are architecturally visible for the platform
> -* buffer flush.  Note that we've already arranged for pmem
> +* The first arch_pmem_flush_barrier() is needed to 'sfence' all
> +* previous writes such that they are architecturally visible for
> +* the platform buffer flush. Note that we've already arranged for 
> pmem
>  * writes to avoid the cache via memcpy_flushcache().  The final
>  * wmb() ensures ordering for the NVDIMM flush write.
>  */
> -   wmb();
> +   arch_pmem_flush_barrier();
> for (i = 0; i < nd_region->ndr_mappings; i++)
> if (ndrd_get_flush_wpq(ndrd, i, 0))
> writeq(1, ndrd_get_flush_wpq(ndrd, i, idx));
> diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h
> index 18da4059be09..66f6c65bd789 100644
> --- a/include/linux/libnvdimm.h
> +++ b/include/linux/libnvdimm.h
> @@ -286,4 +286,8 @@ static inline void arch_invalidate_pmem(void *addr, 
> size_t size)
>  }
>  #endif
>
> +#ifndef arch_pmem_flush_barrier
> +#define arch_pmem_flush_barrier() wmb()
> +#endif

I think it is out of place to define this in libnvdimm.h and it is odd
to give it such a long name. The other pmem api helpers like
arch_wb_cache_pmem() and arch_invalidate_pmem() are function calls for
libnvdimm driver operations, this barrier is just an instruction and
is closer to wmb() than the pmem api routine.

Since it is a store fence for pmem, so let's just call it pmem_wmb()
and define the generic version in include/linux/compiler.h. It should
probably also be documented alongside dma_wmb() in
Documentation/memory-barriers.txt about why code would use it over
wmb(), and why a symmetric pmem_rmb() is not needed.


Re: [PATCH 1/3] powerpc: inline doorbell sending functions

2020-06-29 Thread Michael Ellerman
kernel test robot  writes:
> Hi Nicholas,
>
> I love your patch! Yet something to improve:
>
> [auto build test ERROR on powerpc/next]
> [also build test ERROR on scottwood/next v5.8-rc2 next-20200626]
> [cannot apply to kvm-ppc/kvm-ppc-next]
> [If your patch is applied to the wrong git tree, kindly drop us a note.
> And when submitting patch, we suggest to use  as documented in
> https://git-scm.com/docs/git-format-patch]
>
> url:
> https://github.com/0day-ci/linux/commits/Nicholas-Piggin/powerpc-pseries-IPI-doorbell-improvements/20200627-230544
> base:   https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git next
> config: powerpc-randconfig-c003-20200628 (attached as .config)
> compiler: powerpc64-linux-gcc (GCC) 9.3.0

> If you fix the issue, kindly add following tag as appropriate
> Reported-by: kernel test robot 
>
> All error/warnings (new ones prefixed by >>):
>
>In file included from arch/powerpc/kernel/asm-offsets.c:38:
>arch/powerpc/include/asm/dbell.h: In function 'doorbell_global_ipi':
>>> arch/powerpc/include/asm/dbell.h:114:12: error: implicit declaration of 
>>> function 'get_hard_smp_processor_id'; did you mean 'raw_smp_processor_id'? 
>>> [-Werror=implicit-function-declaration]
>  114 |  u32 tag = get_hard_smp_processor_id(cpu);
>  |^
>  |raw_smp_processor_id
>arch/powerpc/include/asm/dbell.h: In function 'doorbell_try_core_ipi':
>>> arch/powerpc/include/asm/dbell.h:146:28: error: implicit declaration of 
>>> function 'cpu_sibling_mask'; did you mean 'cpu_online_mask'? 
>>> [-Werror=implicit-function-declaration]
>  146 |  if (cpumask_test_cpu(cpu, cpu_sibling_mask(this_cpu))) {
>  |^~~~
>  |cpu_online_mask
>>> arch/powerpc/include/asm/dbell.h:146:28: warning: passing argument 2 of 
>>> 'cpumask_test_cpu' makes pointer from integer without a cast 
>>> [-Wint-conversion]
>  146 |  if (cpumask_test_cpu(cpu, cpu_sibling_mask(this_cpu))) {
>  |^~

Seems like CONFIG_SMP=n is probably the root cause.

You could try including asm/smp.h, but good chance that will lead to
header soup.

Other option would be to wrap the whole lot in #ifdef CONFIG_SMP?

cheers


Re: [PATCH] powerpc: Warn about use of smt_snooze_delay

2020-06-29 Thread Michael Ellerman
Joel Stanley  writes:
> It's not done anything for a long time. Save the percpu variable, and
> emit a warning to remind users to not expect it to do anything.
>
> Signed-off-by: Joel Stanley 
> ---
>  arch/powerpc/kernel/sysfs.c | 41 +
>  1 file changed, 14 insertions(+), 27 deletions(-)
>
> diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
> index 571b3259697e..530ae92bc46d 100644
> --- a/arch/powerpc/kernel/sysfs.c
> +++ b/arch/powerpc/kernel/sysfs.c
> @@ -32,29 +32,25 @@
>  
>  static DEFINE_PER_CPU(struct cpu, cpu_devices);
>  
> -/*
> - * SMT snooze delay stuff, 64-bit only for now
> - */
> -
>  #ifdef CONFIG_PPC64
>  
> -/* Time in microseconds we delay before sleeping in the idle loop */
> -static DEFINE_PER_CPU(long, smt_snooze_delay) = { 100 };
> +/*
> + * Snooze delay has not been hooked up since 3fa8cad82b94 
> ("powerpc/pseries/cpuidle:
> + * smt-snooze-delay cleanup.") and has been broken even longer. As was 
> foretold in
> + * 2014:
> + *
> + *  "ppc64_util currently utilises it. Once we fix ppc64_util, propose to 
> clean
> + *  up the kernel code."
> + *
> + * At some point in the future this code should be removed.
> + */
>  
>  static ssize_t store_smt_snooze_delay(struct device *dev,
> struct device_attribute *attr,
> const char *buf,
> size_t count)
>  {
> - struct cpu *cpu = container_of(dev, struct cpu, dev);
> - ssize_t ret;
> - long snooze;
> -
> - ret = sscanf(buf, "%ld", );
> - if (ret != 1)
> - return -EINVAL;
> -
> - per_cpu(smt_snooze_delay, cpu->dev.id) = snooze;
> + WARN_ON_ONCE("smt_snooze_delay sysfs file has no effect\n");

We shouldn't have user-triggerable WARNs.

I think this should just be a pr_warn_ratelimited(), maybe including
current->comm & pid.

cheers


Re: [PATCH v2] powerpc/uaccess: Use flexible addressing with __put_user()/__get_user()

2020-06-29 Thread Michael Ellerman
Michael Ellerman  writes:
> Christophe Leroy  writes:
>> Hi Michael,
>>
>> I see this patch is marked as "defered" in patchwork, but I can't see 
>> any related discussion. Is it normal ?
>
> Because it uses the "m<>" constraint which didn't work on GCC 4.6.
>
> https://github.com/linuxppc/issues/issues/297
>
> So we should be able to pick it up for v5.9 hopefully.

It seems to break the build with the kernel.org 4.9.4 compiler and
corenet64_smp_defconfig:

+ make -s CC=powerpc64-linux-gnu-gcc -j 160
In file included from /linux/include/linux/uaccess.h:11:0,
 from /linux/include/linux/sched/task.h:11,
 from /linux/include/linux/sched/signal.h:9,
 from /linux/include/linux/rcuwait.h:6,
 from /linux/include/linux/percpu-rwsem.h:7,
 from /linux/include/linux/fs.h:33,
 from /linux/include/linux/huge_mm.h:8,
 from /linux/include/linux/mm.h:675,
 from /linux/arch/powerpc/kernel/signal_32.c:17:
/linux/arch/powerpc/kernel/signal_32.c: In function 
'save_user_regs.isra.14.constprop':
/linux/arch/powerpc/include/asm/uaccess.h:161:2: error: 'asm' operand has 
impossible constraints
  __asm__ __volatile__( \
  ^
/linux/arch/powerpc/include/asm/uaccess.h:197:12: note: in expansion of macro 
'__put_user_asm'
case 4: __put_user_asm(x, ptr, retval, "stw"); break; \
^
/linux/arch/powerpc/include/asm/uaccess.h:206:2: note: in expansion of macro 
'__put_user_size_allowed'
  __put_user_size_allowed(x, ptr, size, retval);  \
  ^
/linux/arch/powerpc/include/asm/uaccess.h:220:2: note: in expansion of macro 
'__put_user_size'
  __put_user_size(__pu_val, __pu_addr, __pu_size, __pu_err); \
  ^
/linux/arch/powerpc/include/asm/uaccess.h:96:2: note: in expansion of macro 
'__put_user_nocheck'
  __put_user_nocheck((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)))
  ^
/linux/arch/powerpc/kernel/signal_32.c:120:7: note: in expansion of macro 
'__put_user'
   if (__put_user((unsigned int)gregs[i], >mc_gregs[i]))
   ^
/linux/scripts/Makefile.build:280: recipe for target 
'arch/powerpc/kernel/signal_32.o' failed
make[3]: *** [arch/powerpc/kernel/signal_32.o] Error 1
make[3]: *** Waiting for unfinished jobs
In file included from /linux/include/linux/uaccess.h:11:0,
 from /linux/include/linux/sched/task.h:11,
 from /linux/include/linux/sched/signal.h:9,
 from /linux/include/linux/rcuwait.h:6,
 from /linux/include/linux/percpu-rwsem.h:7,
 from /linux/include/linux/fs.h:33,
 from /linux/include/linux/huge_mm.h:8,
 from /linux/include/linux/mm.h:675,
 from /linux/arch/powerpc/kernel/signal_64.c:12:
/linux/arch/powerpc/kernel/signal_64.c: In function '__se_sys_swapcontext':
/linux/arch/powerpc/include/asm/uaccess.h:319:2: error: 'asm' operand has 
impossible constraints
  __asm__ __volatile__(\
  ^
/linux/arch/powerpc/include/asm/uaccess.h:359:10: note: in expansion of macro 
'__get_user_asm'
  case 1: __get_user_asm(x, (u8 __user *)ptr, retval, "lbz"); break; \
  ^
/linux/arch/powerpc/include/asm/uaccess.h:370:2: note: in expansion of macro 
'__get_user_size_allowed'
  __get_user_size_allowed(x, ptr, size, retval);  \
  ^
/linux/arch/powerpc/include/asm/uaccess.h:393:3: note: in expansion of macro 
'__get_user_size'
   __get_user_size(__gu_val, __gu_addr, __gu_size, __gu_err); \
   ^
/linux/arch/powerpc/include/asm/uaccess.h:94:2: note: in expansion of macro 
'__get_user_nocheck'
  __get_user_nocheck((x), (ptr), sizeof(*(ptr)), true)
  ^
/linux/arch/powerpc/kernel/signal_64.c:672:9: note: in expansion of macro 
'__get_user'
  || __get_user(tmp, (u8 __user *) new_ctx + ctx_size - 1))
 ^
/linux/scripts/Makefile.build:280: recipe for target 
'arch/powerpc/kernel/signal_64.o' failed
make[3]: *** [arch/powerpc/kernel/signal_64.o] Error 1
/linux/scripts/Makefile.build:497: recipe for target 'arch/powerpc/kernel' 
failed
make[2]: *** [arch/powerpc/kernel] Error 2
/linux/Makefile:1756: recipe for target 'arch/powerpc' failed
make[1]: *** [arch/powerpc] Error 2
Makefile:185: recipe for target '__sub-make' failed
make: *** [__sub-make] Error 2


cheers


Re: [PATCH] powerpc: Warn about use of smt_snooze_delay

2020-06-29 Thread Joel Stanley
On Mon, 29 Jun 2020 at 10:42, Gautham R Shenoy  wrote:
>
> On Thu, Jun 25, 2020 at 07:33:49PM +0930, Joel Stanley wrote:
> > It's not done anything for a long time. Save the percpu variable, and
> > emit a warning to remind users to not expect it to do anything.
> >
> > Signed-off-by: Joel Stanley 
>
> The only known user of "smt_snooze_delay" is the "ppc64_cpu" which
> uses the presence of this file to assume that the system is SMT
> capable.
>
> Since we have "/sys/devices/system/cpu/smt/" these days, perhaps the
> userspace utility can use that and we can get rid of the file
> altogether ?

I've sent a change to the userspace tool to stop using the file. It
now uses the device tree parsing that was already present to determine
the smt state.

 https://github.com/ibm-power-utilities/powerpc-utils/pull/43

We will want to wait for the userspace tool to propagate through a
release and to distros before we remove the file all together. I agree
it should be removed in the future.

I've got of this patch v2 that changes the message to be:

 pr_warn_ratelimited("%s (%d) used unsupported
smt_snooze_delay, this has no effect\n",
current->comm, current->pid);

I'll send that out today.

Cheers,

Joel

>
> FWIW,
> Acked-by: Gautham R. Shenoy 
> > ---
> >  arch/powerpc/kernel/sysfs.c | 41 +
> >  1 file changed, 14 insertions(+), 27 deletions(-)
> >
> > diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
> > index 571b3259697e..530ae92bc46d 100644
> > --- a/arch/powerpc/kernel/sysfs.c
> > +++ b/arch/powerpc/kernel/sysfs.c
> > @@ -32,29 +32,25 @@
> >
> >  static DEFINE_PER_CPU(struct cpu, cpu_devices);
> >
> > -/*
> > - * SMT snooze delay stuff, 64-bit only for now
> > - */
> > -
> >  #ifdef CONFIG_PPC64
> >
> > -/* Time in microseconds we delay before sleeping in the idle loop */
> > -static DEFINE_PER_CPU(long, smt_snooze_delay) = { 100 };
> > +/*
> > + * Snooze delay has not been hooked up since 3fa8cad82b94 
> > ("powerpc/pseries/cpuidle:
> > + * smt-snooze-delay cleanup.") and has been broken even longer. As was 
> > foretold in
> > + * 2014:
> > + *
> > + *  "ppc64_util currently utilises it. Once we fix ppc64_util, propose to 
> > clean
> > + *  up the kernel code."
> > + *
> > + * At some point in the future this code should be removed.
> > + */
> >
> >  static ssize_t store_smt_snooze_delay(struct device *dev,
> > struct device_attribute *attr,
> > const char *buf,
> > size_t count)
> >  {
> > - struct cpu *cpu = container_of(dev, struct cpu, dev);
> > - ssize_t ret;
> > - long snooze;
> > -
> > - ret = sscanf(buf, "%ld", );
> > - if (ret != 1)
> > - return -EINVAL;
> > -
> > - per_cpu(smt_snooze_delay, cpu->dev.id) = snooze;
> > + WARN_ON_ONCE("smt_snooze_delay sysfs file has no effect\n");
> >   return count;
> >  }
> >
> > @@ -62,9 +58,9 @@ static ssize_t show_smt_snooze_delay(struct device *dev,
> >struct device_attribute *attr,
> >char *buf)
> >  {
> > - struct cpu *cpu = container_of(dev, struct cpu, dev);
> > + WARN_ON_ONCE("smt_snooze_delay sysfs file has no effect\n");
> >
> > - return sprintf(buf, "%ld\n", per_cpu(smt_snooze_delay, cpu->dev.id));
> > + return sprintf(buf, "100\n");
> >  }
> >
> >  static DEVICE_ATTR(smt_snooze_delay, 0644, show_smt_snooze_delay,
> > @@ -72,16 +68,7 @@ static DEVICE_ATTR(smt_snooze_delay, 0644, 
> > show_smt_snooze_delay,
> >
> >  static int __init setup_smt_snooze_delay(char *str)
> >  {
> > - unsigned int cpu;
> > - long snooze;
> > -
> > - if (!cpu_has_feature(CPU_FTR_SMT))
> > - return 1;
> > -
> > - snooze = simple_strtol(str, NULL, 10);
> > - for_each_possible_cpu(cpu)
> > - per_cpu(smt_snooze_delay, cpu) = snooze;
> > -
> > + WARN_ON_ONCE("smt-snooze-delay command line option has no effect\n");
> >   return 1;
> >  }
> >  __setup("smt-snooze-delay=", setup_smt_snooze_delay);
> > --
> > 2.27.0
> >


[PATCH] xmon: Reset RCU and soft lockup watchdogs

2020-06-29 Thread Anton Blanchard
I'm seeing RCU warnings when exiting xmon. xmon resets the NMI watchdog,
but does nothing with the RCU stall or soft lockup watchdogs. Add a
helper function that handles all three.

Signed-off-by: Anton Blanchard 
---
 arch/powerpc/xmon/xmon.c | 9 -
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 7efe4bc3ccf6..d27944e38b04 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -481,6 +481,13 @@ static inline int unrecoverable_excp(struct pt_regs *regs)
 #endif
 }
 
+static void xmon_touch_watchdogs(void)
+{
+   touch_softlockup_watchdog_sync();
+   rcu_cpu_stall_reset();
+   touch_nmi_watchdog();
+}
+
 static int xmon_core(struct pt_regs *regs, int fromipi)
 {
int cmd = 0;
@@ -718,7 +725,7 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
else
insert_cpu_bpts();
 
-   touch_nmi_watchdog();
+   xmon_touch_watchdogs();
local_irq_restore(flags);
 
return cmd != 'X' && cmd != EOF;
-- 
2.26.2



[PATCH 1/1] MAINTAINERS: Remove self

2020-06-29 Thread Sam Bobroff
I'm sorry to say I can no longer maintain this position.

Signed-off-by: Sam Bobroff 
---
 MAINTAINERS | 1 -
 1 file changed, 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 496fd4eafb68..7e954e4a29e1 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -13187,7 +13187,6 @@ F:  tools/pci/
 
 PCI ENHANCED ERROR HANDLING (EEH) FOR POWERPC
 M: Russell Currey 
-M: Sam Bobroff 
 M: Oliver O'Halloran 
 L: linuxppc-dev@lists.ozlabs.org
 S: Supported
-- 
2.22.0.216.g00a2a96fc9



Re: [PATCH 0/8] mm: cleanup usage of

2020-06-29 Thread Pekka Enberg
On Sat, Jun 27, 2020 at 5:35 PM Mike Rapoport  wrote:
> Most architectures have very similar versions of pXd_alloc_one() and
> pXd_free_one() for intermediate levels of page table.
> These patches add generic versions of these functions in
>  and enable use of the generic functions where
> appropriate.

Very nice cleanup series to the page table code!

FWIW:

Reviewed-by: Pekka Enberg 


Re: [PATCH 01/20] nfblock: stop using ->queuedata

2020-06-29 Thread Geert Uytterhoeven
On Mon, Jun 29, 2020 at 9:40 PM Christoph Hellwig  wrote:
> Instead of setting up the queuedata as well just use one private data
> field.
>
> Signed-off-by: Christoph Hellwig 

Reviewed-by: Geert Uytterhoeven 
Acked-by: Geert Uytterhoeven 

Gr{oetje,eeting}s,

Geert

-- 
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- ge...@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds


Re: [PATCH v6 6/8] powerpc/pmem: Avoid the barrier in flush routines

2020-06-29 Thread Aneesh Kumar K.V
Michal Suchánek  writes:

> Hello,
>
> On Mon, Jun 29, 2020 at 07:27:20PM +0530, Aneesh Kumar K.V wrote:
>> nvdimm expect the flush routines to just mark the cache clean. The barrier
>> that mark the store globally visible is done in nvdimm_flush().
>> 
>> Update the papr_scm driver to a simplified nvdim_flush callback that do
>> only the required barrier.
>> 
>> Signed-off-by: Aneesh Kumar K.V 
>> ---
>>  arch/powerpc/lib/pmem.c   |  6 --
>>  arch/powerpc/platforms/pseries/papr_scm.c | 13 +
>>  2 files changed, 13 insertions(+), 6 deletions(-)
>> 
>> diff --git a/arch/powerpc/lib/pmem.c b/arch/powerpc/lib/pmem.c
>> index 5a61aaeb6930..21210fa676e5 100644
>> --- a/arch/powerpc/lib/pmem.c
>> +++ b/arch/powerpc/lib/pmem.c
>> @@ -19,9 +19,6 @@ static inline void __clean_pmem_range(unsigned long start, 
>> unsigned long stop)
>>  
>>  for (i = 0; i < size >> shift; i++, addr += bytes)
>>  asm volatile(PPC_DCBSTPS(%0, %1): :"i"(0), "r"(addr): "memory");
>> -
>> -
>> -asm volatile(PPC_PHWSYNC ::: "memory");
>>  }
>>  
>>  static inline void __flush_pmem_range(unsigned long start, unsigned long 
>> stop)
>> @@ -34,9 +31,6 @@ static inline void __flush_pmem_range(unsigned long start, 
>> unsigned long stop)
>>  
>>  for (i = 0; i < size >> shift; i++, addr += bytes)
>>  asm volatile(PPC_DCBFPS(%0, %1): :"i"(0), "r"(addr): "memory");
>> -
>> -
>> -asm volatile(PPC_PHWSYNC ::: "memory");
>>  }
>>  
>>  static inline void clean_pmem_range(unsigned long start, unsigned long stop)
>> diff --git a/arch/powerpc/platforms/pseries/papr_scm.c 
>> b/arch/powerpc/platforms/pseries/papr_scm.c
>> index 9c569078a09f..9a9a0766f8b6 100644
>> --- a/arch/powerpc/platforms/pseries/papr_scm.c
>> +++ b/arch/powerpc/platforms/pseries/papr_scm.c
>> @@ -630,6 +630,18 @@ static int papr_scm_ndctl(struct nvdimm_bus_descriptor 
>> *nd_desc,
>>  
>>  return 0;
>>  }
>> +/*
>> + * We have made sure the pmem writes are done such that before calling this
>> + * all the caches are flushed/clean. We use dcbf/dcbfps to ensure this. Here
>> + * we just need to add the necessary barrier to make sure the above flushes
>> + * are have updated persistent storage before any data access or data 
>> transfer
>> + * caused by subsequent instructions is initiated.
>> + */
>> +static int papr_scm_flush_sync(struct nd_region *nd_region, struct bio *bio)
>> +{
>> +arch_pmem_flush_barrier();
>> +return 0;
>> +}
>>  
>>  static ssize_t flags_show(struct device *dev,
>>struct device_attribute *attr, char *buf)
>> @@ -743,6 +755,7 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
>>  ndr_desc.mapping = 
>>  ndr_desc.num_mappings = 1;
>>  ndr_desc.nd_set = >nd_set;
>> +ndr_desc.flush = papr_scm_flush_sync;
>
> AFAICT currently the only device that implements flush is virtio_pmem.
> How does the nfit driver get away without implementing flush?

generic_nvdimm_flush does the required barrier for nfit. The reason for
adding ndr_desc.flush call back for papr_scm was to avoid the usage
of iomem based deep flushing (ndr_region_data.flush_wpq) which is not
supported by papr_scm.

BTW we do return NULL for ndrd_get_flush_wpq() on power. So the upstream
code also does the same thing, but in a different way.


> Also the flush takes arguments that are completely unused but a user of
> the pmem region must assume they are used, and call flush() on the
> region rather than arch_pmem_flush_barrier() directly.

The bio argument can help a pmem driver to do range based flushing in
case of pmem_make_request. If bio is null then we must assume a full
device flush. 

>This may not
> work well with md as discussed with earlier iteration of the patchest.
>

dm-writecache needs some major changes to work with asynchronous pmem
devices. 

-aneesh


[PATCH updated] libnvdimm/nvdimm/flush: Allow architecture to override the flush barrier

2020-06-29 Thread Aneesh Kumar K.V
Architectures like ppc64 provide persistent memory specific barriers
that will ensure that all stores for which the modifications are
written to persistent storage by preceding dcbfps and dcbstps
instructions have updated persistent storage before any data
access or data transfer caused by subsequent instructions is initiated.
This is in addition to the ordering done by wmb()

Update nvdimm core such that architecture can use barriers other than
wmb to ensure all previous writes are architecturally visible for
the platform buffer flush.

Signed-off-by: Aneesh Kumar K.V 
---
 drivers/md/dm-writecache.c   | 2 +-
 drivers/nvdimm/region_devs.c | 8 
 include/linux/libnvdimm.h| 4 
 3 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c
index 74f3c506f084..8c6b6dce64e2 100644
--- a/drivers/md/dm-writecache.c
+++ b/drivers/md/dm-writecache.c
@@ -536,7 +536,7 @@ static void ssd_commit_superblock(struct dm_writecache *wc)
 static void writecache_commit_flushed(struct dm_writecache *wc, bool 
wait_for_ios)
 {
if (WC_MODE_PMEM(wc))
-   wmb();
+   arch_pmem_flush_barrier();
else
ssd_commit_flushed(wc, wait_for_ios);
 }
diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c
index 4502f9c4708d..b308ad09b63d 100644
--- a/drivers/nvdimm/region_devs.c
+++ b/drivers/nvdimm/region_devs.c
@@ -1206,13 +1206,13 @@ int generic_nvdimm_flush(struct nd_region *nd_region)
idx = this_cpu_add_return(flush_idx, hash_32(current->pid + idx, 8));
 
/*
-* The first wmb() is needed to 'sfence' all previous writes
-* such that they are architecturally visible for the platform
-* buffer flush.  Note that we've already arranged for pmem
+* The first arch_pmem_flush_barrier() is needed to 'sfence' all
+* previous writes such that they are architecturally visible for
+* the platform buffer flush. Note that we've already arranged for pmem
 * writes to avoid the cache via memcpy_flushcache().  The final
 * wmb() ensures ordering for the NVDIMM flush write.
 */
-   wmb();
+   arch_pmem_flush_barrier();
for (i = 0; i < nd_region->ndr_mappings; i++)
if (ndrd_get_flush_wpq(ndrd, i, 0))
writeq(1, ndrd_get_flush_wpq(ndrd, i, idx));
diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h
index 18da4059be09..66f6c65bd789 100644
--- a/include/linux/libnvdimm.h
+++ b/include/linux/libnvdimm.h
@@ -286,4 +286,8 @@ static inline void arch_invalidate_pmem(void *addr, size_t 
size)
 }
 #endif
 
+#ifndef arch_pmem_flush_barrier
+#define arch_pmem_flush_barrier() wmb()
+#endif
+
 #endif /* __LIBNVDIMM_H__ */
-- 
2.26.2



Re: [PATCH v6 4/8] libnvdimm/nvdimm/flush: Allow architecture to override the flush barrier

2020-06-29 Thread Aneesh Kumar K.V
kernel test robot  writes:

> Hi "Aneesh,
>
> I love your patch! Yet something to improve:
>
> [auto build test ERROR on powerpc/next]
> [also build test ERROR on linux-nvdimm/libnvdimm-for-next v5.8-rc3 
> next-20200629]
> [cannot apply to scottwood/next]
> [If your patch is applied to the wrong git tree, kindly drop us a note.
> And when submitting patch, we suggest to use  as documented in
> https://git-scm.com/docs/git-format-patch]
>
> url:
> https://github.com/0day-ci/linux/commits/Aneesh-Kumar-K-V/Support-new-pmem-flush-and-sync-instructions-for-POWER/20200629-223649
> base:   https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git next
> config: arc-allyesconfig (attached as .config)
> compiler: arc-elf-gcc (GCC) 9.3.0
> reproduce (this is a W=1 build):
> wget 
> https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
> ~/bin/make.cross
> chmod +x ~/bin/make.cross
> # save the attached .config to linux build tree
> COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross 
> ARCH=arc 
>
> If you fix the issue, kindly add following tag as appropriate
> Reported-by: kernel test robot 
>
> All errors (new ones prefixed by >>):
>
>drivers/nvdimm/region_devs.c: In function 'generic_nvdimm_flush':
>>> drivers/nvdimm/region_devs.c:1215:2: error: implicit declaration of 
>>> function 'arch_pmem_flush_barrier' [-Werror=implicit-function-declaration]
> 1215 |  arch_pmem_flush_barrier();
>  |  ^~~
>cc1: some warnings being treated as errors

Ok let's move the back to include/linux/libnvdimm.h. Not all arch
include asm-generic/cacheflush.h

-aneesh


Re: [PATCH] ASoC: fsl_asrc: Add an option to select internal ratio mode

2020-06-29 Thread Nicolin Chen
On Mon, Jun 29, 2020 at 09:58:35PM +0800, Shengjiu Wang wrote:
> The ASRC not only supports ideal ratio mode, but also supports
> internal ratio mode.
> 
> For internal rato mode, the rate of clock source should be divided
> with no remainder by sample rate, otherwise there is sound
> distortion.
> 
> Add function fsl_asrc_select_clk() to find proper clock source for
> internal ratio mode, if the clock source is available then internal
> ratio mode will be selected.
> 
> With change, the ideal ratio mode is not the only option for user.
> 
> Signed-off-by: Shengjiu Wang 
> ---

> +static int fsl_asrc_select_clk(struct fsl_asrc_priv *asrc_priv,
> +struct fsl_asrc_pair *pair,
> +int in_rate,
> +int out_rate)
> +{
> + struct fsl_asrc_pair_priv *pair_priv = pair->private;
> + struct asrc_config *config = pair_priv->config;
> + int rate[2], select_clk[2]; /* Array size 2 means IN and OUT */
> + int clk_rate, clk_index;
> + int i = 0, j = 0;
> + bool clk_sel[2];
> +
> + rate[0] = in_rate;
> + rate[1] = out_rate;
> +
> + /* Select proper clock source for internal ratio mode */
> + for (j = 0; j < 2; j++) {
> + for (i = 0; i < ASRC_CLK_MAP_LEN; i++) {
> + clk_index = asrc_priv->clk_map[j][i];
> + clk_rate = 
> clk_get_rate(asrc_priv->asrck_clk[clk_index]);

+   /* Only match a perfect clock source with no remainder 
*/

> + if (clk_rate != 0 && (clk_rate / rate[j]) <= 1024 &&
> + (clk_rate % rate[j]) == 0)
> + break;
> + }
> +
> + if (i == ASRC_CLK_MAP_LEN) {
> + select_clk[j] = OUTCLK_ASRCK1_CLK;
> + clk_sel[j] = false;
> + } else {
> + select_clk[j] = i;
> + clk_sel[j] = true;
> + }
> + }
> +
> + /* Switch to ideal ratio mode if there is no proper clock source */
> + if (!clk_sel[IN] || !clk_sel[OUT])
> + select_clk[IN] = INCLK_NONE;

Could get rid of clk_set:

for (j) {
for (i) {
if (match)
break;
}

clk[j] = i;
}

if (clk[IN] == ASRC_CLK_MAP_LEN || clk[OUT] == ASRC_CLK_MAP_LEN)

And it only overrides clk[IN] setting but leaving clk[OUT] to
to the searching result. This means that clk[OUT] may be using
a clock source other than OUTCLK_ASRCK1_CLK if sel[IN] happens
to be false while sel[OUT] happens to be true. Not sure if it
is intended...but I feel it would probably be safer to use the
previous settings: INCLK_NONE + OUTCLK_ASRCK1_CLK?


[PATCH 20/20] block: remove direct_make_request

2020-06-29 Thread Christoph Hellwig
Now that submit_bio_noacct has a decent blk-mq fast path there is no
more need for this bypass.

Signed-off-by: Christoph Hellwig 
---
 block/blk-core.c  | 28 
 drivers/md/dm.c   |  5 +
 drivers/nvme/host/multipath.c |  2 +-
 include/linux/blkdev.h|  1 -
 4 files changed, 2 insertions(+), 34 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index 46e3c0a37cc377..f127d83c4fafa5 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1206,34 +1206,6 @@ blk_qc_t submit_bio_noacct(struct bio *bio)
 }
 EXPORT_SYMBOL(submit_bio_noacct);
 
-/**
- * direct_make_request - hand a buffer directly to its device driver for I/O
- * @bio:  The bio describing the location in memory and on the device.
- *
- * This function behaves like submit_bio_noacct(), but does not protect
- * against recursion.  Must only be used if the called driver is known
- * to be blk-mq based.
- */
-blk_qc_t direct_make_request(struct bio *bio)
-{
-   struct gendisk *disk = bio->bi_disk;
-
-   if (WARN_ON_ONCE(!disk->queue->mq_ops)) {
-   bio_io_error(bio);
-   return BLK_QC_T_NONE;
-   }
-   if (!submit_bio_checks(bio))
-   return BLK_QC_T_NONE;
-   if (unlikely(bio_queue_enter(bio)))
-   return BLK_QC_T_NONE;
-   if (!blk_crypto_bio_prep()) {
-   blk_queue_exit(disk->queue);
-   return BLK_QC_T_NONE;
-   }
-   return blk_mq_submit_bio(bio);
-}
-EXPORT_SYMBOL_GPL(direct_make_request);
-
 /**
  * submit_bio - submit a bio to the block device layer for I/O
  * @bio: The  bio which describes the I/O
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index b32b539dbace56..2cb33896198c4c 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1302,10 +1302,7 @@ static blk_qc_t __map_bio(struct dm_target_io *tio)
/* the bio has been remapped so dispatch it */
trace_block_bio_remap(clone->bi_disk->queue, clone,
  bio_dev(io->orig_bio), sector);
-   if (md->type == DM_TYPE_NVME_BIO_BASED)
-   ret = direct_make_request(clone);
-   else
-   ret = submit_bio_noacct(clone);
+   ret = submit_bio_noacct(clone);
break;
case DM_MAPIO_KILL:
free_tio(tio);
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index f07fa47c251d9d..a986ac52c4cc7f 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -314,7 +314,7 @@ blk_qc_t nvme_ns_head_submit_bio(struct bio *bio)
trace_block_bio_remap(bio->bi_disk->queue, bio,
  disk_devt(ns->head->disk),
  bio->bi_iter.bi_sector);
-   ret = direct_make_request(bio);
+   ret = submit_bio_noacct(bio);
} else if (nvme_available_path(head)) {
dev_warn_ratelimited(dev, "no usable path - requeuing I/O\n");
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index b73cfa6a5141df..1cc913ffdbe21e 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -853,7 +853,6 @@ static inline void rq_flush_dcache_pages(struct request *rq)
 extern int blk_register_queue(struct gendisk *disk);
 extern void blk_unregister_queue(struct gendisk *disk);
 blk_qc_t submit_bio_noacct(struct bio *bio);
-extern blk_qc_t direct_make_request(struct bio *bio);
 extern void blk_rq_init(struct request_queue *q, struct request *rq);
 extern void blk_put_request(struct request *);
 extern struct request *blk_get_request(struct request_queue *, unsigned int op,
-- 
2.26.2



[PATCH 19/20] block: shortcut __submit_bio_noacct for blk-mq drivers

2020-06-29 Thread Christoph Hellwig
For blk-mq drivers bios can only be inserted for the same queue.  So
bypass the complicated sorting logic in __submit_bio_noacct with
a blk-mq simpler submission helper.

Signed-off-by: Christoph Hellwig 
---
 block/blk-core.c | 50 +---
 1 file changed, 35 insertions(+), 15 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index b82f48c86e6f7a..46e3c0a37cc377 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1071,20 +1071,6 @@ static noinline_for_stack bool submit_bio_checks(struct 
bio *bio)
return false;
 }
 
-static blk_qc_t do_make_request(struct bio *bio)
-{
-   struct gendisk *disk = bio->bi_disk;
-   blk_qc_t ret = BLK_QC_T_NONE;
-
-   if (blk_crypto_bio_prep()) {
-   if (!disk->fops->submit_bio)
-   return blk_mq_submit_bio(bio);
-   ret = disk->fops->submit_bio(bio);
-   }
-   blk_queue_exit(disk->queue);
-   return ret;
-}
-
 /*
  * The loop in this function may be a bit non-obvious, and so deserves some
  * explanation:
@@ -1127,7 +1113,11 @@ static blk_qc_t __submit_bio_noacct(struct bio *bio)
bio_list_on_stack[1] = bio_list_on_stack[0];
bio_list_init(_list_on_stack[0]);
 
-   ret = do_make_request(bio);
+   if (blk_crypto_bio_prep())
+   ret = bio->bi_disk->fops->submit_bio(bio);
+   else
+   ret = BLK_QC_T_NONE;
+   blk_queue_exit(q);
 
/*
 * Sort new bios into those for a lower level and those for the
@@ -1153,6 +1143,34 @@ static blk_qc_t __submit_bio_noacct(struct bio *bio)
return ret;
 }
 
+static blk_qc_t __submit_bio_noacct_mq(struct bio *bio)
+{
+   struct gendisk *disk = bio->bi_disk;
+   struct bio_list bio_list;
+   blk_qc_t ret = BLK_QC_T_NONE;
+
+   bio_list_init(_list);
+   current->bio_list = _list;
+
+   do {
+   WARN_ON_ONCE(bio->bi_disk != disk);
+
+   if (unlikely(bio_queue_enter(bio) != 0))
+   continue;
+
+   if (!blk_crypto_bio_prep()) {
+   blk_queue_exit(disk->queue);
+   ret = BLK_QC_T_NONE;
+   continue;
+   }
+
+   ret = blk_mq_submit_bio(bio);
+   } while ((bio = bio_list_pop(_list)));
+
+   current->bio_list = NULL;
+   return ret;
+}
+
 /**
  * submit_bio_noacct - re-submit a bio to the block device layer for I/O
  * @bio:  The bio describing the location in memory and on the device.
@@ -1182,6 +1200,8 @@ blk_qc_t submit_bio_noacct(struct bio *bio)
return BLK_QC_T_NONE;
}
 
+   if (bio->bi_disk->queue->mq_ops)
+   return __submit_bio_noacct_mq(bio);
return __submit_bio_noacct(bio);
 }
 EXPORT_SYMBOL(submit_bio_noacct);
-- 
2.26.2



[PATCH 18/20] block: refator submit_bio_noacct

2020-06-29 Thread Christoph Hellwig
Split out a __submit_bio_noacct helper for the actual de-recursion
algorithm, and simplify the loop by using a continue when we can't
enter the queue for a bio.

Signed-off-by: Christoph Hellwig 
---
 block/blk-core.c | 131 +--
 1 file changed, 71 insertions(+), 60 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index 1caeb01e127768..b82f48c86e6f7a 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1085,6 +1085,74 @@ static blk_qc_t do_make_request(struct bio *bio)
return ret;
 }
 
+/*
+ * The loop in this function may be a bit non-obvious, and so deserves some
+ * explanation:
+ *
+ *  - Before entering the loop, bio->bi_next is NULL (as all callers ensure
+ *that), so we have a list with a single bio.
+ *  - We pretend that we have just taken it off a longer list, so we assign
+ *bio_list to a pointer to the bio_list_on_stack, thus initialising the
+ *bio_list of new bios to be added.  ->submit_bio() may indeed add some 
more
+ *bios through a recursive call to submit_bio_noacct.  If it did, we find a
+ *non-NULL value in bio_list and re-enter the loop from the top.
+ *  - In this case we really did just take the bio of the top of the list (no
+ *pretending) and so remove it from bio_list, and call into ->submit_bio()
+ *again.
+ *
+ * bio_list_on_stack[0] contains bios submitted by the current ->submit_bio.
+ * bio_list_on_stack[1] contains bios that were submitted before the current
+ * ->submit_bio_bio, but that haven't been processed yet.
+ */
+static blk_qc_t __submit_bio_noacct(struct bio *bio)
+{
+   struct bio_list bio_list_on_stack[2];
+   blk_qc_t ret = BLK_QC_T_NONE;
+
+   BUG_ON(bio->bi_next);
+
+   bio_list_init(_list_on_stack[0]);
+   current->bio_list = bio_list_on_stack;
+
+   do {
+   struct request_queue *q = bio->bi_disk->queue;
+   struct bio_list lower, same;
+
+   if (unlikely(bio_queue_enter(bio) != 0))
+   continue;
+
+   /*
+* Create a fresh bio_list for all subordinate requests.
+*/
+   bio_list_on_stack[1] = bio_list_on_stack[0];
+   bio_list_init(_list_on_stack[0]);
+
+   ret = do_make_request(bio);
+
+   /*
+* Sort new bios into those for a lower level and those for the
+* same level.
+*/
+   bio_list_init();
+   bio_list_init();
+   while ((bio = bio_list_pop(_list_on_stack[0])) != NULL)
+   if (q == bio->bi_disk->queue)
+   bio_list_add(, bio);
+   else
+   bio_list_add(, bio);
+
+   /*
+* Now assemble so we handle the lowest level first.
+*/
+   bio_list_merge(_list_on_stack[0], );
+   bio_list_merge(_list_on_stack[0], );
+   bio_list_merge(_list_on_stack[0], _list_on_stack[1]);
+   } while ((bio = bio_list_pop(_list_on_stack[0])));
+
+   current->bio_list = NULL;
+   return ret;
+}
+
 /**
  * submit_bio_noacct - re-submit a bio to the block device layer for I/O
  * @bio:  The bio describing the location in memory and on the device.
@@ -1096,17 +1164,8 @@ static blk_qc_t do_make_request(struct bio *bio)
  */
 blk_qc_t submit_bio_noacct(struct bio *bio)
 {
-   /*
-* bio_list_on_stack[0] contains bios submitted by the current
-* ->submit_bio.
-* bio_list_on_stack[1] contains bios that were submitted before the
-* current ->submit_bio_bio, but that haven't been processed yet.
-*/
-   struct bio_list bio_list_on_stack[2];
-   blk_qc_t ret = BLK_QC_T_NONE;
-
if (!submit_bio_checks(bio))
-   goto out;
+   return BLK_QC_T_NONE;
 
/*
 * We only want one ->submit_bio to be active at a time, else
@@ -1120,58 +1179,10 @@ blk_qc_t submit_bio_noacct(struct bio *bio)
 */
if (current->bio_list) {
bio_list_add(>bio_list[0], bio);
-   goto out;
+   return BLK_QC_T_NONE;
}
 
-   /* following loop may be a bit non-obvious, and so deserves some
-* explanation.
-* Before entering the loop, bio->bi_next is NULL (as all callers
-* ensure that) so we have a list with a single bio.
-* We pretend that we have just taken it off a longer list, so
-* we assign bio_list to a pointer to the bio_list_on_stack,
-* thus initialising the bio_list of new bios to be
-* added.  ->submit_bio() may indeed add some more bios
-* through a recursive call to submit_bio_noacct.  If it
-* did, we find a non-NULL value in bio_list and re-enter the loop
-* from the top.  In this case we really did just take the bio
-* of the top 

[PATCH 17/20] block: rename generic_make_request to submit_bio_noacct

2020-06-29 Thread Christoph Hellwig
generic_make_request has always been very confusingly misnamed, so rename
it to submit_bio_noacct to make it clear that it is submit_bio minus
accounting and a few checks.

Signed-off-by: Christoph Hellwig 
---
 Documentation/block/biodoc.rst|  2 +-
 .../fault-injection/fault-injection.rst   |  2 +-
 Documentation/trace/ftrace.rst|  4 +--
 block/bio.c   | 14 +-
 block/blk-core.c  | 23 ---
 block/blk-crypto-fallback.c   |  2 +-
 block/blk-crypto.c|  2 +-
 block/blk-merge.c |  2 +-
 block/blk-throttle.c  |  4 +--
 block/bounce.c|  2 +-
 drivers/block/drbd/drbd_int.h |  6 ++--
 drivers/block/drbd/drbd_main.c|  2 +-
 drivers/block/drbd/drbd_receiver.c|  2 +-
 drivers/block/drbd/drbd_req.c |  2 +-
 drivers/block/drbd/drbd_worker.c  |  2 +-
 drivers/block/pktcdvd.c   |  2 +-
 drivers/lightnvm/pblk-read.c  |  2 +-
 drivers/md/bcache/bcache.h|  2 +-
 drivers/md/bcache/btree.c |  2 +-
 drivers/md/bcache/request.c   |  7 ++---
 drivers/md/dm-cache-target.c  |  6 ++--
 drivers/md/dm-clone-target.c  | 10 +++
 drivers/md/dm-crypt.c |  6 ++--
 drivers/md/dm-delay.c |  2 +-
 drivers/md/dm-era-target.c|  2 +-
 drivers/md/dm-integrity.c |  4 +--
 drivers/md/dm-mpath.c |  2 +-
 drivers/md/dm-raid1.c |  2 +-
 drivers/md/dm-snap-persistent.c   |  2 +-
 drivers/md/dm-snap.c  |  6 ++--
 drivers/md/dm-thin.c  |  4 +--
 drivers/md/dm-verity-target.c |  2 +-
 drivers/md/dm-writecache.c|  2 +-
 drivers/md/dm-zoned-target.c  |  2 +-
 drivers/md/dm.c   | 10 +++
 drivers/md/md-faulty.c|  4 +--
 drivers/md/md-linear.c|  4 +--
 drivers/md/md-multipath.c |  4 +--
 drivers/md/raid0.c|  8 +++---
 drivers/md/raid1.c| 14 +-
 drivers/md/raid10.c   | 28 +--
 drivers/md/raid5.c| 10 +++
 drivers/nvme/host/multipath.c |  2 +-
 include/linux/blkdev.h|  2 +-
 44 files changed, 111 insertions(+), 113 deletions(-)

diff --git a/Documentation/block/biodoc.rst b/Documentation/block/biodoc.rst
index 267384159bf793..afda5e30a82e5a 100644
--- a/Documentation/block/biodoc.rst
+++ b/Documentation/block/biodoc.rst
@@ -1036,7 +1036,7 @@ Now the generic block layer performs partition-remapping 
early and thus
 provides drivers with a sector number relative to whole device, rather than
 having to take partition number into account in order to arrive at the true
 sector number. The routine blk_partition_remap() is invoked by
-generic_make_request even before invoking the queue specific ->submit_bio,
+submit_bio_noacct even before invoking the queue specific ->submit_bio,
 so the i/o scheduler also gets to operate on whole disk sector numbers. This
 should typically not require changes to block drivers, it just never gets
 to invoke its own partition sector offset calculations since all bios
diff --git a/Documentation/fault-injection/fault-injection.rst 
b/Documentation/fault-injection/fault-injection.rst
index f51bb21d20e44b..f850ad018b70a8 100644
--- a/Documentation/fault-injection/fault-injection.rst
+++ b/Documentation/fault-injection/fault-injection.rst
@@ -24,7 +24,7 @@ Available fault injection capabilities
 
   injects disk IO errors on devices permitted by setting
   /sys/block//make-it-fail or
-  /sys/block///make-it-fail. (generic_make_request())
+  /sys/block///make-it-fail. (submit_bio_noacct())
 
 - fail_mmc_request
 
diff --git a/Documentation/trace/ftrace.rst b/Documentation/trace/ftrace.rst
index 430a16283103d4..80ba765a82379e 100644
--- a/Documentation/trace/ftrace.rst
+++ b/Documentation/trace/ftrace.rst
@@ -1453,7 +1453,7 @@ function-trace, we get a much larger output::
=> __blk_run_queue_uncond
=> __blk_run_queue
=> blk_queue_bio
-   => generic_make_request
+   => submit_bio_noacct
=> submit_bio
=> submit_bh
=> __ext3_get_inode_loc
@@ -1738,7 +1738,7 @@ tracers.
=> __blk_run_queue_uncond
=> __blk_run_queue
=> blk_queue_bio
-   => generic_make_request
+   => submit_bio_noacct
=> submit_bio
=> submit_bh
=> ext3_bread
diff --git a/block/bio.c b/block/bio.c
index fc1299f9d86a24..ef91782fd668ce 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -358,7 

[PATCH 16/20] block: move ->make_request_fn to struct block_device_operations

2020-06-29 Thread Christoph Hellwig
The make_request_fn is a little weird in that it sits directly in
struct request_queue instead of an operation vector.  Replace it with
a block_device_operations method called submit_bio (which describes much
better what it does).  Also remove the request_queue argument to it, as
the queue can be derived pretty trivially from the bio.

Signed-off-by: Christoph Hellwig 
---
 Documentation/block/biodoc.rst|  2 +-
 .../block/writeback_cache_control.rst |  2 +-
 arch/m68k/emu/nfblock.c   |  5 +-
 arch/xtensa/platforms/iss/simdisk.c   |  5 +-
 block/blk-cgroup.c|  2 +-
 block/blk-core.c  | 53 +++
 block/blk-mq.c| 10 ++--
 block/blk.h   |  2 -
 drivers/block/brd.c   |  5 +-
 drivers/block/drbd/drbd_int.h |  2 +-
 drivers/block/drbd/drbd_main.c|  9 ++--
 drivers/block/drbd/drbd_req.c |  2 +-
 drivers/block/null_blk_main.c | 17 --
 drivers/block/pktcdvd.c   | 11 ++--
 drivers/block/ps3vram.c   | 15 +++---
 drivers/block/rsxx/dev.c  |  7 ++-
 drivers/block/umem.c  |  5 +-
 drivers/block/zram/zram_drv.c | 11 ++--
 drivers/lightnvm/core.c   |  8 +--
 drivers/lightnvm/pblk-init.c  | 12 +++--
 drivers/md/bcache/request.c   |  4 +-
 drivers/md/bcache/request.h   |  4 +-
 drivers/md/bcache/super.c | 23 +---
 drivers/md/dm.c   | 23 
 drivers/md/md.c   |  5 +-
 drivers/nvdimm/blk.c  |  5 +-
 drivers/nvdimm/btt.c  |  5 +-
 drivers/nvdimm/pmem.c |  5 +-
 drivers/nvme/host/core.c  |  1 +
 drivers/nvme/host/multipath.c |  5 +-
 drivers/nvme/host/nvme.h  |  1 +
 drivers/s390/block/dcssblk.c  |  9 ++--
 drivers/s390/block/xpram.c|  6 +--
 include/linux/blk-mq.h|  2 +-
 include/linux/blkdev.h|  7 +--
 include/linux/lightnvm.h  |  3 +-
 36 files changed, 153 insertions(+), 140 deletions(-)

diff --git a/Documentation/block/biodoc.rst b/Documentation/block/biodoc.rst
index b964796ec9c780..267384159bf793 100644
--- a/Documentation/block/biodoc.rst
+++ b/Documentation/block/biodoc.rst
@@ -1036,7 +1036,7 @@ Now the generic block layer performs partition-remapping 
early and thus
 provides drivers with a sector number relative to whole device, rather than
 having to take partition number into account in order to arrive at the true
 sector number. The routine blk_partition_remap() is invoked by
-generic_make_request even before invoking the queue specific make_request_fn,
+generic_make_request even before invoking the queue specific ->submit_bio,
 so the i/o scheduler also gets to operate on whole disk sector numbers. This
 should typically not require changes to block drivers, it just never gets
 to invoke its own partition sector offset calculations since all bios
diff --git a/Documentation/block/writeback_cache_control.rst 
b/Documentation/block/writeback_cache_control.rst
index 2c752c57c14c62..b208488d0aae85 100644
--- a/Documentation/block/writeback_cache_control.rst
+++ b/Documentation/block/writeback_cache_control.rst
@@ -47,7 +47,7 @@ the Forced Unit Access is implemented.  The REQ_PREFLUSH and 
REQ_FUA flags
 may both be set on a single bio.
 
 
-Implementation details for make_request_fn based block drivers
+Implementation details for bio based block drivers
 --
 
 These drivers will always see the REQ_PREFLUSH and REQ_FUA bits as they sit
diff --git a/arch/m68k/emu/nfblock.c b/arch/m68k/emu/nfblock.c
index 87e8b1700acd28..92d26c81244134 100644
--- a/arch/m68k/emu/nfblock.c
+++ b/arch/m68k/emu/nfblock.c
@@ -59,7 +59,7 @@ struct nfhd_device {
struct gendisk *disk;
 };
 
-static blk_qc_t nfhd_make_request(struct request_queue *queue, struct bio *bio)
+static blk_qc_t nfhd_submit_bio(struct bio *bio)
 {
struct nfhd_device *dev = bio->bi_disk->private_data;
struct bio_vec bvec;
@@ -93,6 +93,7 @@ static int nfhd_getgeo(struct block_device *bdev, struct 
hd_geometry *geo)
 
 static const struct block_device_operations nfhd_ops = {
.owner  = THIS_MODULE,
+   .submit_bio = nfhd_submit_bio,
.getgeo = nfhd_getgeo,
 };
 
@@ -118,7 +119,7 @@ static int __init nfhd_init_one(int id, u32 blocks, u32 
bsize)
dev->bsize = bsize;
dev->bshift = ffs(bsize) - 10;
 
-   dev->queue = blk_alloc_queue(nfhd_make_request, NUMA_NO_NODE);
+   dev->queue = 

[PATCH 15/20] block: remove the nr_sectors variable in generic_make_request_checks

2020-06-29 Thread Christoph Hellwig
The variable is only used once, so just open code the bio_sector()
there.

Signed-off-by: Christoph Hellwig 
---
 block/blk-core.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index 37435d0d433564..28f60985dc75cc 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -974,7 +974,6 @@ static noinline_for_stack bool
 generic_make_request_checks(struct bio *bio)
 {
struct request_queue *q = bio->bi_disk->queue;
-   int nr_sectors = bio_sectors(bio);
blk_status_t status = BLK_STS_IOERR;
 
might_sleep();
@@ -1007,7 +1006,7 @@ generic_make_request_checks(struct bio *bio)
if (op_is_flush(bio->bi_opf) &&
!test_bit(QUEUE_FLAG_WC, >queue_flags)) {
bio->bi_opf &= ~(REQ_PREFLUSH | REQ_FUA);
-   if (!nr_sectors) {
+   if (!bio_sectors(bio)) {
status = BLK_STS_OK;
goto end_io;
}
-- 
2.26.2



[PATCH 14/20] block: remove the NULL queue check in generic_make_request_checks

2020-06-29 Thread Christoph Hellwig
All registers disks must have a valid queue pointer, so don't bother to
log a warning for that case.

Signed-off-by: Christoph Hellwig 
---
 block/blk-core.c | 12 +---
 1 file changed, 1 insertion(+), 11 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index 95dca74534ff73..37435d0d433564 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -973,22 +973,12 @@ static inline blk_status_t blk_check_zone_append(struct 
request_queue *q,
 static noinline_for_stack bool
 generic_make_request_checks(struct bio *bio)
 {
-   struct request_queue *q;
+   struct request_queue *q = bio->bi_disk->queue;
int nr_sectors = bio_sectors(bio);
blk_status_t status = BLK_STS_IOERR;
-   char b[BDEVNAME_SIZE];
 
might_sleep();
 
-   q = bio->bi_disk->queue;
-   if (unlikely(!q)) {
-   printk(KERN_ERR
-  "generic_make_request: Trying to access "
-   "nonexistent block-device %s (%Lu)\n",
-   bio_devname(bio, b), (long long)bio->bi_iter.bi_sector);
-   goto end_io;
-   }
-
/*
 * For a REQ_NOWAIT based request, return -EOPNOTSUPP
 * if queue is not a request based queue.
-- 
2.26.2



[PATCH 12/20] block: remove the request_queue argument from blk_queue_split

2020-06-29 Thread Christoph Hellwig
The queue can be trivially derived from the bio, so pass one less
argument.

Signed-off-by: Christoph Hellwig 
---
 block/blk-merge.c | 21 ++---
 block/blk-mq.c|  2 +-
 block/blk.h   |  3 +--
 drivers/block/drbd/drbd_req.c |  2 +-
 drivers/block/pktcdvd.c   |  2 +-
 drivers/block/ps3vram.c   |  2 +-
 drivers/block/rsxx/dev.c  |  2 +-
 drivers/block/umem.c  |  2 +-
 drivers/lightnvm/pblk-init.c  |  4 ++--
 drivers/md/dm.c   |  2 +-
 drivers/md/md.c   |  2 +-
 drivers/nvme/host/multipath.c |  9 -
 drivers/s390/block/dcssblk.c  |  2 +-
 drivers/s390/block/xpram.c|  2 +-
 include/linux/blkdev.h|  2 +-
 15 files changed, 28 insertions(+), 31 deletions(-)

diff --git a/block/blk-merge.c b/block/blk-merge.c
index 9c9fb21584b64e..20fa2290604105 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -283,20 +283,20 @@ static struct bio *blk_bio_segment_split(struct 
request_queue *q,
 
 /**
  * __blk_queue_split - split a bio and submit the second half
- * @q:   [in] request queue pointer
  * @bio: [in, out] bio to be split
  * @nr_segs: [out] number of segments in the first bio
  *
  * Split a bio into two bios, chain the two bios, submit the second half and
  * store a pointer to the first half in *@bio. If the second bio is still too
  * big it will be split by a recursive call to this function. Since this
- * function may allocate a new bio from @q->bio_split, it is the responsibility
- * of the caller to ensure that @q is only released after processing of the
+ * function may allocate a new bio from @bio->bi_disk->queue->bio_split, it is
+ * the responsibility of the caller to ensure that
+ * @bio->bi_disk->queue->bio_split is only released after processing of the
  * split bio has finished.
  */
-void __blk_queue_split(struct request_queue *q, struct bio **bio,
-   unsigned int *nr_segs)
+void __blk_queue_split(struct bio **bio, unsigned int *nr_segs)
 {
+   struct request_queue *q = (*bio)->bi_disk->queue;
struct bio *split = NULL;
 
switch (bio_op(*bio)) {
@@ -345,20 +345,19 @@ void __blk_queue_split(struct request_queue *q, struct 
bio **bio,
 
 /**
  * blk_queue_split - split a bio and submit the second half
- * @q:   [in] request queue pointer
  * @bio: [in, out] bio to be split
  *
  * Split a bio into two bios, chains the two bios, submit the second half and
  * store a pointer to the first half in *@bio. Since this function may allocate
- * a new bio from @q->bio_split, it is the responsibility of the caller to
- * ensure that @q is only released after processing of the split bio has
- * finished.
+ * a new bio from @bio->bi_disk->queue->bio_split, it is the responsibility of
+ * the caller to ensure that @bio->bi_disk->queue->bio_split is only released
+ * after processing of the split bio has finished.
  */
-void blk_queue_split(struct request_queue *q, struct bio **bio)
+void blk_queue_split(struct bio **bio)
 {
unsigned int nr_segs;
 
-   __blk_queue_split(q, bio, _segs);
+   __blk_queue_split(bio, _segs);
 }
 EXPORT_SYMBOL(blk_queue_split);
 
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 72d3034fe39d87..40b8d8ba894d5e 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2086,7 +2086,7 @@ blk_qc_t blk_mq_make_request(struct request_queue *q, 
struct bio *bio)
blk_status_t ret;
 
blk_queue_bounce(q, );
-   __blk_queue_split(q, , _segs);
+   __blk_queue_split(, _segs);
 
if (!bio_integrity_prep(bio))
goto queue_exit;
diff --git a/block/blk.h b/block/blk.h
index 41a50880c94e98..90416cdc40a36a 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -225,8 +225,7 @@ ssize_t part_timeout_show(struct device *, struct 
device_attribute *, char *);
 ssize_t part_timeout_store(struct device *, struct device_attribute *,
const char *, size_t);
 
-void __blk_queue_split(struct request_queue *q, struct bio **bio,
-   unsigned int *nr_segs);
+void __blk_queue_split(struct bio **bio, unsigned int *nr_segs);
 int ll_back_merge_fn(struct request *req, struct bio *bio,
unsigned int nr_segs);
 int ll_front_merge_fn(struct request *req,  struct bio *bio,
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index 3f09b2ab977822..9368680474223a 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -1598,7 +1598,7 @@ blk_qc_t drbd_make_request(struct request_queue *q, 
struct bio *bio)
struct drbd_device *device = bio->bi_disk->private_data;
unsigned long start_jif;
 
-   blk_queue_split(q, );
+   blk_queue_split();
 
start_jif = jiffies;
 
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 27a33adc41e487..29b0c62dc86c1f 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -2434,7 +2434,7 @@ static blk_qc_t 

[PATCH 13/20] block: tidy up a warning in bio_check_ro

2020-06-29 Thread Christoph Hellwig
The "generic_make_request: " prefix has no value, and will soon become
stale.

Signed-off-by: Christoph Hellwig 
---
 block/blk-core.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index 76cfd5709f66cd..95dca74534ff73 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -869,8 +869,7 @@ static inline bool bio_check_ro(struct bio *bio, struct 
hd_struct *part)
return false;
 
WARN_ONCE(1,
-  "generic_make_request: Trying to write "
-   "to read-only block-device %s (partno %d)\n",
+  "Trying to write to read-only block-device %s (partno 
%d)\n",
bio_devname(bio, b), part->partno);
/* Older lvm-tools actually trigger this */
return false;
-- 
2.26.2



[PATCH 11/20] fs: remove a weird comment in submit_bh_wbc

2020-06-29 Thread Christoph Hellwig
All bios can get remapped if submitted to partitions.  No need to
comment on that.

Signed-off-by: Christoph Hellwig 
---
 fs/buffer.c | 5 -
 1 file changed, 5 deletions(-)

diff --git a/fs/buffer.c b/fs/buffer.c
index 64fe82ec65ff1f..2725ebbcfdc246 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -3040,12 +3040,7 @@ static int submit_bh_wbc(int op, int op_flags, struct 
buffer_head *bh,
if (test_set_buffer_req(bh) && (op == REQ_OP_WRITE))
clear_buffer_write_io_error(bh);
 
-   /*
-* from here on down, it's all bio -- do the initial mapping,
-* submit_bio -> generic_make_request may further map this bio around
-*/
bio = bio_alloc(GFP_NOIO, 1);
-
bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
bio_set_dev(bio, bh->b_bdev);
bio->bi_write_hint = write_hint;
-- 
2.26.2



[PATCH 10/20] dm: stop using ->queuedata

2020-06-29 Thread Christoph Hellwig
Instead of setting up the queuedata as well just use one private data
field.

Signed-off-by: Christoph Hellwig 
---
 drivers/md/dm.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index e44473fe0f4873..c8d91f271c272e 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1789,7 +1789,7 @@ static blk_qc_t dm_process_bio(struct mapped_device *md,
 
 static blk_qc_t dm_make_request(struct request_queue *q, struct bio *bio)
 {
-   struct mapped_device *md = q->queuedata;
+   struct mapped_device *md = bio->bi_disk->private_data;
blk_qc_t ret = BLK_QC_T_NONE;
int srcu_idx;
struct dm_table *map;
@@ -1995,7 +1995,6 @@ static struct mapped_device *alloc_dev(int minor)
md->queue = blk_alloc_queue(dm_make_request, numa_node_id);
if (!md->queue)
goto bad;
-   md->queue->queuedata = md;
 
md->disk = alloc_disk_node(1, md->numa_node_id);
if (!md->disk)
-- 
2.26.2



[PATCH 01/20] nfblock: stop using ->queuedata

2020-06-29 Thread Christoph Hellwig
Instead of setting up the queuedata as well just use one private data
field.

Signed-off-by: Christoph Hellwig 
---
 arch/m68k/emu/nfblock.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/arch/m68k/emu/nfblock.c b/arch/m68k/emu/nfblock.c
index c3a630440512e9..87e8b1700acd28 100644
--- a/arch/m68k/emu/nfblock.c
+++ b/arch/m68k/emu/nfblock.c
@@ -61,7 +61,7 @@ struct nfhd_device {
 
 static blk_qc_t nfhd_make_request(struct request_queue *queue, struct bio *bio)
 {
-   struct nfhd_device *dev = queue->queuedata;
+   struct nfhd_device *dev = bio->bi_disk->private_data;
struct bio_vec bvec;
struct bvec_iter iter;
int dir, len, shift;
@@ -122,7 +122,6 @@ static int __init nfhd_init_one(int id, u32 blocks, u32 
bsize)
if (dev->queue == NULL)
goto free_dev;
 
-   dev->queue->queuedata = dev;
blk_queue_logical_block_size(dev->queue, bsize);
 
dev->disk = alloc_disk(16);
-- 
2.26.2



[PATCH 04/20] null_blk: stop using ->queuedata for bio mode

2020-06-29 Thread Christoph Hellwig
Instead of setting up the queuedata as well just use one private data
field.

Signed-off-by: Christoph Hellwig 
---
 drivers/block/null_blk_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/block/null_blk_main.c b/drivers/block/null_blk_main.c
index 82259242b9b5c9..93ce0a00b2ae01 100644
--- a/drivers/block/null_blk_main.c
+++ b/drivers/block/null_blk_main.c
@@ -1392,7 +1392,7 @@ static blk_qc_t null_queue_bio(struct request_queue *q, 
struct bio *bio)
 {
sector_t sector = bio->bi_iter.bi_sector;
sector_t nr_sectors = bio_sectors(bio);
-   struct nullb *nullb = q->queuedata;
+   struct nullb *nullb = bio->bi_disk->private_data;
struct nullb_queue *nq = nullb_to_queue(nullb);
struct nullb_cmd *cmd;
 
-- 
2.26.2



[PATCH 03/20] drbd: stop using ->queuedata

2020-06-29 Thread Christoph Hellwig
Instead of setting up the queuedata as well just use one private data
field.

Signed-off-by: Christoph Hellwig 
---
 drivers/block/drbd/drbd_main.c | 1 -
 drivers/block/drbd/drbd_req.c  | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 45fbd526c453bc..26f4e0aa7393b4 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -2805,7 +2805,6 @@ enum drbd_ret_code drbd_create_device(struct 
drbd_config_context *adm_ctx, unsig
if (!q)
goto out_no_q;
device->rq_queue = q;
-   q->queuedata   = device;
 
disk = alloc_disk(1);
if (!disk)
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index c80a2f1c3c2a73..3f09b2ab977822 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -1595,7 +1595,7 @@ void do_submit(struct work_struct *ws)
 
 blk_qc_t drbd_make_request(struct request_queue *q, struct bio *bio)
 {
-   struct drbd_device *device = (struct drbd_device *) q->queuedata;
+   struct drbd_device *device = bio->bi_disk->private_data;
unsigned long start_jif;
 
blk_queue_split(q, );
-- 
2.26.2



[PATCH 05/20] ps3vram: stop using ->queuedata

2020-06-29 Thread Christoph Hellwig
Instead of setting up the queuedata as well just use one private data
field.

Signed-off-by: Christoph Hellwig 
---
 drivers/block/ps3vram.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/block/ps3vram.c b/drivers/block/ps3vram.c
index 821d4d8b1d763e..5a1d1d137c7248 100644
--- a/drivers/block/ps3vram.c
+++ b/drivers/block/ps3vram.c
@@ -587,7 +587,7 @@ static struct bio *ps3vram_do_bio(struct 
ps3_system_bus_device *dev,
 
 static blk_qc_t ps3vram_make_request(struct request_queue *q, struct bio *bio)
 {
-   struct ps3_system_bus_device *dev = q->queuedata;
+   struct ps3_system_bus_device *dev = bio->bi_disk->private_data;
struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev);
int busy;
 
@@ -745,7 +745,6 @@ static int ps3vram_probe(struct ps3_system_bus_device *dev)
}
 
priv->queue = queue;
-   queue->queuedata = dev;
blk_queue_max_segments(queue, BLK_MAX_SEGMENTS);
blk_queue_max_segment_size(queue, BLK_MAX_SEGMENT_SIZE);
blk_queue_max_hw_sectors(queue, BLK_SAFE_MAX_SECTORS);
-- 
2.26.2



[PATCH 02/20] simdisk: stop using ->queuedata

2020-06-29 Thread Christoph Hellwig
Instead of setting up the queuedata as well just use one private data
field.

Signed-off-by: Christoph Hellwig 
---
 arch/xtensa/platforms/iss/simdisk.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/arch/xtensa/platforms/iss/simdisk.c 
b/arch/xtensa/platforms/iss/simdisk.c
index 49322b66cda931..31b5020077a059 100644
--- a/arch/xtensa/platforms/iss/simdisk.c
+++ b/arch/xtensa/platforms/iss/simdisk.c
@@ -103,7 +103,7 @@ static void simdisk_transfer(struct simdisk *dev, unsigned 
long sector,
 
 static blk_qc_t simdisk_make_request(struct request_queue *q, struct bio *bio)
 {
-   struct simdisk *dev = q->queuedata;
+   struct simdisk *dev = bio->bi_disk->private_data;
struct bio_vec bvec;
struct bvec_iter iter;
sector_t sector = bio->bi_iter.bi_sector;
@@ -273,8 +273,6 @@ static int __init simdisk_setup(struct simdisk *dev, int 
which,
goto out_alloc_queue;
}
 
-   dev->queue->queuedata = dev;
-
dev->gd = alloc_disk(SIMDISK_MINORS);
if (dev->gd == NULL) {
pr_err("alloc_disk failed\n");
-- 
2.26.2



[PATCH 06/20] rsxx: stop using ->queuedata

2020-06-29 Thread Christoph Hellwig
Instead of setting up the queuedata as well just use one private data
field.

Signed-off-by: Christoph Hellwig 
---
 drivers/block/rsxx/dev.c | 5 +
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/block/rsxx/dev.c b/drivers/block/rsxx/dev.c
index 3ba07ab30c84f5..6a4d8d26e32cbd 100644
--- a/drivers/block/rsxx/dev.c
+++ b/drivers/block/rsxx/dev.c
@@ -119,7 +119,7 @@ static void bio_dma_done_cb(struct rsxx_cardinfo *card,
 
 static blk_qc_t rsxx_make_request(struct request_queue *q, struct bio *bio)
 {
-   struct rsxx_cardinfo *card = q->queuedata;
+   struct rsxx_cardinfo *card = bio->bi_disk->private_data;
struct rsxx_bio_meta *bio_meta;
blk_status_t st = BLK_STS_IOERR;
 
@@ -267,8 +267,6 @@ int rsxx_setup_dev(struct rsxx_cardinfo *card)
card->queue->limits.discard_alignment   = RSXX_HW_BLK_SIZE;
}
 
-   card->queue->queuedata = card;
-
snprintf(card->gendisk->disk_name, sizeof(card->gendisk->disk_name),
 "rsxx%d", card->disk_id);
card->gendisk->major = card->major;
@@ -289,7 +287,6 @@ void rsxx_destroy_dev(struct rsxx_cardinfo *card)
card->gendisk = NULL;
 
blk_cleanup_queue(card->queue);
-   card->queue->queuedata = NULL;
unregister_blkdev(card->major, DRIVER_NAME);
 }
 
-- 
2.26.2



[PATCH 07/20] umem: stop using ->queuedata

2020-06-29 Thread Christoph Hellwig
Instead of setting up the queuedata as well just use one private data
field.

Signed-off-by: Christoph Hellwig 
---
 drivers/block/umem.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/block/umem.c b/drivers/block/umem.c
index 1e2aa5ae27963c..5498f1cf36b3fe 100644
--- a/drivers/block/umem.c
+++ b/drivers/block/umem.c
@@ -521,7 +521,8 @@ static int mm_check_plugged(struct cardinfo *card)
 
 static blk_qc_t mm_make_request(struct request_queue *q, struct bio *bio)
 {
-   struct cardinfo *card = q->queuedata;
+   struct cardinfo *card = bio->bi_disk->private_data;
+
pr_debug("mm_make_request %llu %u\n",
 (unsigned long long)bio->bi_iter.bi_sector,
 bio->bi_iter.bi_size);
@@ -888,7 +889,6 @@ static int mm_pci_probe(struct pci_dev *dev, const struct 
pci_device_id *id)
card->queue = blk_alloc_queue(mm_make_request, NUMA_NO_NODE);
if (!card->queue)
goto failed_alloc;
-   card->queue->queuedata = card;
 
tasklet_init(>tasklet, process_page, (unsigned long)card);
 
-- 
2.26.2



[PATCH 09/20] bcache: stop setting ->queuedata

2020-06-29 Thread Christoph Hellwig
Nothing in bcache actually uses the ->queuedata field.

Signed-off-by: Christoph Hellwig 
---
 drivers/md/bcache/super.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 2014016f9a60d3..21aa168113d30b 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -876,7 +876,6 @@ static int bcache_device_init(struct bcache_device *d, 
unsigned int block_size,
return -ENOMEM;
 
d->disk->queue  = q;
-   q->queuedata= d;
q->backing_dev_info->congested_data = d;
q->limits.max_hw_sectors= UINT_MAX;
q->limits.max_sectors   = UINT_MAX;
-- 
2.26.2



rename ->make_request_fn and move it to the block_device_operations

2020-06-29 Thread Christoph Hellwig
Hi Jens,

this series moves the make_request_fn method into block_device_operations
with the much more descriptive ->submit_bio name.  It then also gives
generic_make_request a more descriptive name, and further optimize the
path to issue to blk-mq, removing the need for the direct_make_request
bypass.

Diffstat:
 Documentation/block/biodoc.rst|2 
 Documentation/block/writeback_cache_control.rst   |2 
 Documentation/fault-injection/fault-injection.rst |2 
 Documentation/trace/ftrace.rst|4 
 arch/m68k/emu/nfblock.c   |8 
 arch/xtensa/platforms/iss/simdisk.c   |9 
 block/bio.c   |   14 -
 block/blk-cgroup.c|2 
 block/blk-core.c  |  255 +-
 block/blk-crypto-fallback.c   |2 
 block/blk-crypto.c|2 
 block/blk-merge.c |   23 -
 block/blk-mq.c|   12 -
 block/blk-throttle.c  |4 
 block/blk.h   |5 
 block/bounce.c|2 
 drivers/block/brd.c   |5 
 drivers/block/drbd/drbd_int.h |8 
 drivers/block/drbd/drbd_main.c|   12 -
 drivers/block/drbd/drbd_receiver.c|2 
 drivers/block/drbd/drbd_req.c |8 
 drivers/block/drbd/drbd_worker.c  |2 
 drivers/block/null_blk_main.c |   19 +
 drivers/block/pktcdvd.c   |   15 -
 drivers/block/ps3vram.c   |   20 -
 drivers/block/rsxx/dev.c  |   14 -
 drivers/block/umem.c  |   11 
 drivers/block/zram/zram_drv.c |   14 -
 drivers/lightnvm/core.c   |8 
 drivers/lightnvm/pblk-init.c  |   16 -
 drivers/lightnvm/pblk-read.c  |2 
 drivers/md/bcache/bcache.h|2 
 drivers/md/bcache/btree.c |2 
 drivers/md/bcache/request.c   |   11 
 drivers/md/bcache/request.h   |4 
 drivers/md/bcache/super.c |   24 +-
 drivers/md/dm-cache-target.c  |6 
 drivers/md/dm-clone-target.c  |   10 
 drivers/md/dm-crypt.c |6 
 drivers/md/dm-delay.c |2 
 drivers/md/dm-era-target.c|2 
 drivers/md/dm-integrity.c |4 
 drivers/md/dm-mpath.c |2 
 drivers/md/dm-raid1.c |2 
 drivers/md/dm-snap-persistent.c   |2 
 drivers/md/dm-snap.c  |6 
 drivers/md/dm-thin.c  |4 
 drivers/md/dm-verity-target.c |2 
 drivers/md/dm-writecache.c|2 
 drivers/md/dm-zoned-target.c  |2 
 drivers/md/dm.c   |   41 +--
 drivers/md/md-faulty.c|4 
 drivers/md/md-linear.c|4 
 drivers/md/md-multipath.c |4 
 drivers/md/md.c   |7 
 drivers/md/raid0.c|8 
 drivers/md/raid1.c|   14 -
 drivers/md/raid10.c   |   28 +-
 drivers/md/raid5.c|   10 
 drivers/nvdimm/blk.c  |5 
 drivers/nvdimm/btt.c  |5 
 drivers/nvdimm/pmem.c |5 
 drivers/nvme/host/core.c  |1 
 drivers/nvme/host/multipath.c |   18 -
 drivers/nvme/host/nvme.h  |1 
 drivers/s390/block/dcssblk.c  |   11 
 drivers/s390/block/xpram.c|8 
 fs/buffer.c   |5 
 include/linux/blk-mq.h|2 
 include/linux/blkdev.h|   12 -
 include/linux/lightnvm.h  |3 
 71 files changed, 387 insertions(+), 408 deletions(-)


[PATCH 08/20] zram: stop using ->queuedata

2020-06-29 Thread Christoph Hellwig
Instead of setting up the queuedata as well just use one private data
field.

Signed-off-by: Christoph Hellwig 
---
 drivers/block/zram/zram_drv.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 6e2ad90b17a376..0564e3f384089e 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -1586,7 +1586,7 @@ static void __zram_make_request(struct zram *zram, struct 
bio *bio)
  */
 static blk_qc_t zram_make_request(struct request_queue *queue, struct bio *bio)
 {
-   struct zram *zram = queue->queuedata;
+   struct zram *zram = bio->bi_disk->private_data;
 
if (!valid_io_request(zram, bio->bi_iter.bi_sector,
bio->bi_iter.bi_size)) {
@@ -1912,7 +1912,6 @@ static int zram_add(void)
zram->disk->first_minor = device_id;
zram->disk->fops = _devops;
zram->disk->queue = queue;
-   zram->disk->queue->queuedata = zram;
zram->disk->private_data = zram;
snprintf(zram->disk->disk_name, 16, "zram%d", device_id);
 
-- 
2.26.2



Re: [PATCH v6 4/8] libnvdimm/nvdimm/flush: Allow architecture to override the flush barrier

2020-06-29 Thread kernel test robot
Hi "Aneesh,

I love your patch! Yet something to improve:

[auto build test ERROR on powerpc/next]
[also build test ERROR on linux-nvdimm/libnvdimm-for-next v5.8-rc3 
next-20200629]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use  as documented in
https://git-scm.com/docs/git-format-patch]

url:
https://github.com/0day-ci/linux/commits/Aneesh-Kumar-K-V/Support-new-pmem-flush-and-sync-instructions-for-POWER/20200629-223649
base:   https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git next
config: mips-allyesconfig (attached as .config)
compiler: mips-linux-gcc (GCC) 9.3.0
reproduce (this is a W=1 build):
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
# save the attached .config to linux build tree
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross 
ARCH=mips 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot 

All errors (new ones prefixed by >>):

   drivers/md/dm-writecache.c: In function 'writecache_commit_flushed':
>> drivers/md/dm-writecache.c:539:3: error: implicit declaration of function 
>> 'arch_pmem_flush_barrier' [-Werror=implicit-function-declaration]
 539 |   arch_pmem_flush_barrier();
 |   ^~~
   cc1: some warnings being treated as errors

vim +/arch_pmem_flush_barrier +539 drivers/md/dm-writecache.c

   535  
   536  static void writecache_commit_flushed(struct dm_writecache *wc, bool 
wait_for_ios)
   537  {
   538  if (WC_MODE_PMEM(wc))
 > 539  arch_pmem_flush_barrier();
   540  else
   541  ssd_commit_flushed(wc, wait_for_ios);
   542  }
   543  

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-...@lists.01.org


.config.gz
Description: application/gzip


Re: [PATCH v3 0/4] Migrate non-migrated pages of a SVM.

2020-06-29 Thread Ram Pai
On Mon, Jun 29, 2020 at 07:23:30AM +0530, Bharata B Rao wrote:
> On Sun, Jun 28, 2020 at 09:41:53PM +0530, Bharata B Rao wrote:
> > On Fri, Jun 19, 2020 at 03:43:38PM -0700, Ram Pai wrote:
> > > The time taken to switch a VM to Secure-VM, increases by the size of the 
> > > VM.  A
> > > 100GB VM takes about 7minutes. This is unacceptable.  This linear 
> > > increase is
> > > caused by a suboptimal behavior by the Ultravisor and the Hypervisor.  The
> > > Ultravisor unnecessarily migrates all the GFN of the VM from 
> > > normal-memory to
> > > secure-memory. It has to just migrate the necessary and sufficient GFNs.
> > > 
> > > However when the optimization is incorporated in the Ultravisor, the 
> > > Hypervisor
> > > starts misbehaving. The Hypervisor has a inbuilt assumption that the 
> > > Ultravisor
> > > will explicitly request to migrate, each and every GFN of the VM. If only
> > > necessary and sufficient GFNs are requested for migration, the Hypervisor
> > > continues to manage the remaining GFNs as normal GFNs. This leads of 
> > > memory
> > > corruption, manifested consistently when the SVM reboots.
> > > 
> > > The same is true, when a memory slot is hotplugged into a SVM. The 
> > > Hypervisor
> > > expects the ultravisor to request migration of all GFNs to secure-GFN.  
> > > But at
> > > the same time, the hypervisor is unable to handle any H_SVM_PAGE_IN 
> > > requests
> > > from the Ultravisor, done in the context of UV_REGISTER_MEM_SLOT ucall.  
> > > This
> > > problem manifests as random errors in the SVM, when a memory-slot is
> > > hotplugged.
> > > 
> > > This patch series automatically migrates the non-migrated pages of a SVM,
> > >  and thus solves the problem.
> > 
> > So this is what I understand as the objective of this patchset:
> > 
> > 1. Getting all the pages into the secure memory right when the guest
> >transitions into secure mode is expensive. Ultravisor wants to just get
> >the necessary and sufficient pages in and put the onus on the Hypervisor
> >to mark the remaining pages (w/o actual page-in) as secure during
> >H_SVM_INIT_DONE.
> > 2. During H_SVM_INIT_DONE, you want a way to differentiate the pages that
> >are already secure from the pages that are shared and that are paged-out.
> >For this you are introducing all these new states in HV.
> > 
> > UV knows about the shared GFNs and maintains the state of the same. Hence
> > let HV send all the pages (minus already secured pages) via H_SVM_PAGE_IN
> > and if UV finds any shared pages in them, let it fail the uv-page-in call.
> > Then HV can fail the migration for it  and the page continues to remain
> > shared. With this, you don't need to maintain a state for secured GFN in HV.
> > 
> > In the unlikely case of sending a paged-out page to UV during
> > H_SVM_INIT_DONE, let the page-in succeed and HV will fault on it again
> > if required. With this, you don't need a state in HV to identify a
> > paged-out-but-encrypted state.
> > 
> > Doesn't the above work?
> 
> I see that you want to infact skip the uv-page-in calls from H_SVM_INIT_DONE.
> So that would need the extra states in HV which you are proposing here.

Yes. I want to skip to speed up the overall ESM switch.

RP


Re: [PATCH] ASoC: fsl_sai: Refine regcache usage with pm runtime

2020-06-29 Thread Nicolin Chen
On Mon, Jun 29, 2020 at 02:42:33PM +0800, Shengjiu Wang wrote:
> When there is dedicated power domain bound with device, after probing
> the power will be disabled, then registers are not accessible in
> fsl_sai_dai_probe(), so regcache only need to be enabled in end of
> probe() and regcache_mark_dirty should be moved to pm runtime resume
> callback function.
> 
> Signed-off-by: Shengjiu Wang 

Acked-by: Nicolin Chen 


Re: [PATCH v6 4/8] libnvdimm/nvdimm/flush: Allow architecture to override the flush barrier

2020-06-29 Thread kernel test robot
Hi "Aneesh,

I love your patch! Yet something to improve:

[auto build test ERROR on powerpc/next]
[also build test ERROR on linux-nvdimm/libnvdimm-for-next v5.8-rc3 
next-20200629]
[cannot apply to scottwood/next]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use  as documented in
https://git-scm.com/docs/git-format-patch]

url:
https://github.com/0day-ci/linux/commits/Aneesh-Kumar-K-V/Support-new-pmem-flush-and-sync-instructions-for-POWER/20200629-223649
base:   https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git next
config: arc-allyesconfig (attached as .config)
compiler: arc-elf-gcc (GCC) 9.3.0
reproduce (this is a W=1 build):
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
# save the attached .config to linux build tree
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross ARCH=arc 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot 

All errors (new ones prefixed by >>):

   drivers/nvdimm/region_devs.c: In function 'generic_nvdimm_flush':
>> drivers/nvdimm/region_devs.c:1215:2: error: implicit declaration of function 
>> 'arch_pmem_flush_barrier' [-Werror=implicit-function-declaration]
1215 |  arch_pmem_flush_barrier();
 |  ^~~
   cc1: some warnings being treated as errors

vim +/arch_pmem_flush_barrier +1215 drivers/nvdimm/region_devs.c

  1178  
  1179  int nvdimm_flush(struct nd_region *nd_region, struct bio *bio)
  1180  {
  1181  int rc = 0;
  1182  
  1183  if (!nd_region->flush)
  1184  rc = generic_nvdimm_flush(nd_region);
  1185  else {
  1186  if (nd_region->flush(nd_region, bio))
  1187  rc = -EIO;
  1188  }
  1189  
  1190  return rc;
  1191  }
  1192  /**
  1193   * nvdimm_flush - flush any posted write queues between the cpu and 
pmem media
  1194   * @nd_region: blk or interleaved pmem region
  1195   */
  1196  int generic_nvdimm_flush(struct nd_region *nd_region)
  1197  {
  1198  struct nd_region_data *ndrd = dev_get_drvdata(_region->dev);
  1199  int i, idx;
  1200  
  1201  /*
  1202   * Try to encourage some diversity in flush hint addresses
  1203   * across cpus assuming a limited number of flush hints.
  1204   */
  1205  idx = this_cpu_read(flush_idx);
  1206  idx = this_cpu_add_return(flush_idx, hash_32(current->pid + 
idx, 8));
  1207  
  1208  /*
  1209   * The first arch_pmem_flush_barrier() is needed to 'sfence' all
  1210   * previous writes such that they are architecturally visible 
for
  1211   * the platform buffer flush. Note that we've already arranged 
for pmem
  1212   * writes to avoid the cache via memcpy_flushcache().  The final
  1213   * wmb() ensures ordering for the NVDIMM flush write.
  1214   */
> 1215  arch_pmem_flush_barrier();
  1216  for (i = 0; i < nd_region->ndr_mappings; i++)
  1217  if (ndrd_get_flush_wpq(ndrd, i, 0))
  1218  writeq(1, ndrd_get_flush_wpq(ndrd, i, idx));
  1219  wmb();
  1220  
  1221  return 0;
  1222  }
  1223  EXPORT_SYMBOL_GPL(nvdimm_flush);
  1224  

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-...@lists.01.org


.config.gz
Description: application/gzip


[Bug 208181] BUG: KASAN: stack-out-of-bounds in strcmp+0x58/0xd8

2020-06-29 Thread bugzilla-daemon
https://bugzilla.kernel.org/show_bug.cgi?id=208181

--- Comment #7 from Erhard F. (erhar...@mailbox.org) ---
Created attachment 289947
  --> https://bugzilla.kernel.org/attachment.cgi?id=289947=edit
segment_registers

-- 
You are receiving this mail because:
You are watching the assignee of the bug.

[Bug 208181] BUG: KASAN: stack-out-of-bounds in strcmp+0x58/0xd8

2020-06-29 Thread bugzilla-daemon
https://bugzilla.kernel.org/show_bug.cgi?id=208181

--- Comment #6 from Erhard F. (erhar...@mailbox.org) ---
Created attachment 289945
  --> https://bugzilla.kernel.org/attachment.cgi?id=289945=edit
block_address_translation

Sure.

-- 
You are receiving this mail because:
You are watching the assignee of the bug.

Re: [PATCH v6 6/8] powerpc/pmem: Avoid the barrier in flush routines

2020-06-29 Thread Michal Suchánek
Hello,

On Mon, Jun 29, 2020 at 07:27:20PM +0530, Aneesh Kumar K.V wrote:
> nvdimm expect the flush routines to just mark the cache clean. The barrier
> that mark the store globally visible is done in nvdimm_flush().
> 
> Update the papr_scm driver to a simplified nvdim_flush callback that do
> only the required barrier.
> 
> Signed-off-by: Aneesh Kumar K.V 
> ---
>  arch/powerpc/lib/pmem.c   |  6 --
>  arch/powerpc/platforms/pseries/papr_scm.c | 13 +
>  2 files changed, 13 insertions(+), 6 deletions(-)
> 
> diff --git a/arch/powerpc/lib/pmem.c b/arch/powerpc/lib/pmem.c
> index 5a61aaeb6930..21210fa676e5 100644
> --- a/arch/powerpc/lib/pmem.c
> +++ b/arch/powerpc/lib/pmem.c
> @@ -19,9 +19,6 @@ static inline void __clean_pmem_range(unsigned long start, 
> unsigned long stop)
>  
>   for (i = 0; i < size >> shift; i++, addr += bytes)
>   asm volatile(PPC_DCBSTPS(%0, %1): :"i"(0), "r"(addr): "memory");
> -
> -
> - asm volatile(PPC_PHWSYNC ::: "memory");
>  }
>  
>  static inline void __flush_pmem_range(unsigned long start, unsigned long 
> stop)
> @@ -34,9 +31,6 @@ static inline void __flush_pmem_range(unsigned long start, 
> unsigned long stop)
>  
>   for (i = 0; i < size >> shift; i++, addr += bytes)
>   asm volatile(PPC_DCBFPS(%0, %1): :"i"(0), "r"(addr): "memory");
> -
> -
> - asm volatile(PPC_PHWSYNC ::: "memory");
>  }
>  
>  static inline void clean_pmem_range(unsigned long start, unsigned long stop)
> diff --git a/arch/powerpc/platforms/pseries/papr_scm.c 
> b/arch/powerpc/platforms/pseries/papr_scm.c
> index 9c569078a09f..9a9a0766f8b6 100644
> --- a/arch/powerpc/platforms/pseries/papr_scm.c
> +++ b/arch/powerpc/platforms/pseries/papr_scm.c
> @@ -630,6 +630,18 @@ static int papr_scm_ndctl(struct nvdimm_bus_descriptor 
> *nd_desc,
>  
>   return 0;
>  }
> +/*
> + * We have made sure the pmem writes are done such that before calling this
> + * all the caches are flushed/clean. We use dcbf/dcbfps to ensure this. Here
> + * we just need to add the necessary barrier to make sure the above flushes
> + * are have updated persistent storage before any data access or data 
> transfer
> + * caused by subsequent instructions is initiated.
> + */
> +static int papr_scm_flush_sync(struct nd_region *nd_region, struct bio *bio)
> +{
> + arch_pmem_flush_barrier();
> + return 0;
> +}
>  
>  static ssize_t flags_show(struct device *dev,
> struct device_attribute *attr, char *buf)
> @@ -743,6 +755,7 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
>   ndr_desc.mapping = 
>   ndr_desc.num_mappings = 1;
>   ndr_desc.nd_set = >nd_set;
> + ndr_desc.flush = papr_scm_flush_sync;

AFAICT currently the only device that implements flush is virtio_pmem.
How does the nfit driver get away without implementing flush?
Also the flush takes arguments that are completely unused but a user of
the pmem region must assume they are used, and call flush() on the
region rather than arch_pmem_flush_barrier() directly.  This may not
work well with md as discussed with earlier iteration of the patchest.

Thanks

Michal


Re: [PATCH 4/8] asm-generic: pgalloc: provide generic pmd_alloc_one() and pmd_free_one()

2020-06-29 Thread Mike Rapoport
On Sat, Jun 27, 2020 at 08:03:04PM +0100, Matthew Wilcox wrote:
> On Sat, Jun 27, 2020 at 05:34:49PM +0300, Mike Rapoport wrote:
> > More elaborate versions on arm64 and x86 account memory for the user page
> > tables and call to pgtable_pmd_page_ctor() as the part of PMD page
> > initialization.
> > 
> > Move the arm64 version to include/asm-generic/pgalloc.h and use the generic
> > version on several architectures.
> > 
> > The pgtable_pmd_page_ctor() is a NOP when ARCH_ENABLE_SPLIT_PMD_PTLOCK is
> > not enabled, so there is no functional change for most architectures except
> > of the addition of __GFP_ACCOUNT for allocation of user page tables.
> 
> Thanks for including this line; it reminded me that we're not setting
> the PageTable flag on the page, nor accounting it to the zone page stats.
> Hope you don't mind me tagging a patch to do that on as 9/8.

We also never set PageTable flag for early page tables and for the page
tables allocated directly with get_free_page(), e.g PTI, KASAN.

> We could also do with a pud_page_[cd]tor and maybe even p4d/pgd versions.
> But that brings me to the next question -- could/should some of this
> be moved over to asm-generic/pgalloc.h?  The ctor/dtor aren't called
> from anywhere else, and there's value to reducing the total amount of
> code in mm.h, but then there's also value to keeping all the ifdef
> ARCH_ENABLE_SPLIT_PMD_PTLOCK code together too.  So I'm a bit torn.
> What do you think?

-- 
Sincerely yours,
Mike.


Re: [PATCH 6/8] asm-generic: pgalloc: provide generic pgd_free()

2020-06-29 Thread Geert Uytterhoeven
On Sat, Jun 27, 2020 at 4:36 PM Mike Rapoport  wrote:
> From: Mike Rapoport 
>
> Most architectures define pgd_free() as a wrapper for free_page().
>
> Provide a generic version in asm-generic/pgalloc.h and enable its use for
> most architectures.
>
> Signed-off-by: Mike Rapoport 

For the m68k part:
Acked-by: Geert Uytterhoeven 

Gr{oetje,eeting}s,

Geert

-- 
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- ge...@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds


Re: [PATCH 1/8] mm: remove unneeded includes of

2020-06-29 Thread Geert Uytterhoeven
On Sat, Jun 27, 2020 at 4:35 PM Mike Rapoport  wrote:
> From: Mike Rapoport 
>
> In the most cases  header is required only for allocations
> of page table memory. Most of the .c files that include that header do not
> use symbols declared in  and do not require that header.
>
> As for the other header files that used to include , it is
> possible to move that include into the .c file that actually uses symbols
> from  and drop the include from the header file.
>
> The process was somewhat automated using
>
> sed -i -E '/[<"]asm\/pgalloc\.h/d' \
> $(grep -L -w -f /tmp/xx \
> $(git grep -E -l '[<"]asm/pgalloc\.h'))
>
> where /tmp/xx contains all the symbols defined in
> arch/*/include/asm/pgalloc.h.
>
> Signed-off-by: Mike Rapoport 

For the m68k part:
Acked-by: Geert Uytterhoeven 

Gr{oetje,eeting}s,

Geert

-- 
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- ge...@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds


Re: [PATCH v5 3/3] mm/page_alloc: Keep memoryless cpuless node 0 offline

2020-06-29 Thread Christopher Lameter
On Wed, 24 Jun 2020, Srikar Dronamraju wrote:

> Currently Linux kernel with CONFIG_NUMA on a system with multiple
> possible nodes, marks node 0 as online at boot.  However in practice,
> there are systems which have node 0 as memoryless and cpuless.

Maybe add something to explain why you are not simply mapping the
existing memory to NUMA node 0 which is after all just a numbering scheme
used by the kernel and can be used arbitrarily?

This could be seen more as a bug in the arch code during the setup of NUMA
nodes. The two nodes are created by the firmwware / bootstrap code after
all. Just do not do it?



[Bug 208181] BUG: KASAN: stack-out-of-bounds in strcmp+0x58/0xd8

2020-06-29 Thread bugzilla-daemon
https://bugzilla.kernel.org/show_bug.cgi?id=208181

--- Comment #5 from Christophe Leroy (christophe.le...@csgroup.eu) ---
Can we get a dump of /sys/kernel/debug/powerpc/block_address_translation

-- 
You are receiving this mail because:
You are watching the assignee of the bug.

[Bug 208181] BUG: KASAN: stack-out-of-bounds in strcmp+0x58/0xd8

2020-06-29 Thread bugzilla-daemon
https://bugzilla.kernel.org/show_bug.cgi?id=208181

--- Comment #4 from Erhard F. (erhar...@mailbox.org) ---
Erm wait... there is some change.

In 5.8-rc1 stacktrace was:
BUG: KASAN: stack-out-of-bounds in strcmp+0x58/0xd8
Read of size 1 at addr c11c1a80 by task swapper/0

CPU: 0 PID: 0 Comm: swapper Not tainted 5.8.0-rc1-PowerMacG4 #2
Call Trace:
[c1ae97d0] [c0a2069c] dump_stack+0xfc/0x158 (unreliable)
[c1ae9800] [c04ac5cc] print_address_description.isra.0+0x30/0x3fc
[c1ae9870] [c04acb28] kasan_report+0x110/0x170
[c1ae98b0] [c0a44c00] strcmp+0x58/0xd8
[c1ae98d0] [c0170790] register_lock_class+0xfa4/0x10a0
[c1ae9990] [c0170a34] __lock_acquire+0x1a8/0x382c
[c1ae9b40] [c016f398] lock_acquire+0x5e0/0x854
[c1ae9c00] [c1144014] _raw_spin_lock_irqsave+0x48/0x70
[c1ae9c20] [c0ccbe84] of_find_property+0x2c/0x5c
[c1ae9c50] [c0ccbec8] of_get_property+0x14/0x6c
[c1ae9c70] [c0cdbcd8] unflatten_dt_nodes+0xc4c/0xcdc
[c1ae9ec0] [c0cdbe90] __unflatten_device_tree+0x114/0x1e0
[c1ae9ef0] [c184a294] unflatten_device_tree+0x38/0x54
[c1ae9f10] [c1808600] setup_arch+0xc8/0x630
[c1ae9f50] [c1803268] start_kernel+0xcc/0x4cc
[c1ae9ff0] [38a0] 0x38a0


In 5.8-rc3 stacktrace is:
BUG: KASAN: stack-out-of-bounds in vprintk_func+0x100/0x4b4
Read of size 4 at addr c1919e14 by task swapper/0

CPU: 0 PID: 0 Comm: swapper Not tainted 5.8.0-rc3-PowerMacG4 #2
Call Trace:
[c1ae9c00] [c0a304dc] dump_stack+0xfc/0x158 (unreliable)
[c1ae9c30] [c04ac990] print_address_description.isra.0+0x30/0x3fc
[c1ae9ca0] [c04aceec] kasan_report+0x110/0x170
[c1ae9ce0] [c018c204] vprintk_func+0x100/0x4b4
[c1ae9d10] [c018afd4] printk+0xa8/0xd4
[c1ae9db0] [c003c8c4] __ioremap_caller+0x1c4/0x27c
[c1ae9df0] [c003c394] ioremap+0x20/0x30
[c1ae9e00] [c1813fe4] pmac_feature_init+0x288/0xd90
[c1ae9ed0] [c1812cb0] pmac_probe+0x13c/0x190
[c1ae9ef0] [c001d938] probe_machine+0xe8/0x13c
[c1ae9f10] [c1808614] setup_arch+0xdc/0x630
[c1ae9f50] [c1803268] start_kernel+0xcc/0x4cc
[c1ae9ff0] [38a0] 0x38a0


What stays the same are the two "setbat: no BAT available" messages in both
cases.

-- 
You are receiving this mail because:
You are watching the assignee of the bug.

[Bug 208181] BUG: KASAN: stack-out-of-bounds in strcmp+0x58/0xd8

2020-06-29 Thread bugzilla-daemon
https://bugzilla.kernel.org/show_bug.cgi?id=208181

Erhard F. (erhar...@mailbox.org) changed:

   What|Removed |Added

 Attachment #289661|0   |1
is obsolete||

--- Comment #3 from Erhard F. (erhar...@mailbox.org) ---
Created attachment 289937
  --> https://bugzilla.kernel.org/attachment.cgi?id=289937=edit
kernel .config (5.8-rc3, PowerMac G4 DP)

-- 
You are receiving this mail because:
You are watching the assignee of the bug.

[Bug 208181] BUG: KASAN: stack-out-of-bounds in strcmp+0x58/0xd8

2020-06-29 Thread bugzilla-daemon
https://bugzilla.kernel.org/show_bug.cgi?id=208181

Erhard F. (erhar...@mailbox.org) changed:

   What|Removed |Added

 Attachment #289659|0   |1
is obsolete||

--- Comment #2 from Erhard F. (erhar...@mailbox.org) ---
Created attachment 289935
  --> https://bugzilla.kernel.org/attachment.cgi?id=289935=edit
dmesg (5.8-rc3, INLINE KASAN, PowerMac G4 DP

No change with 5.8-rc3.

-- 
You are receiving this mail because:
You are watching the assignee of the bug.

[PATCH] ASoC: fsl_asrc: Add an option to select internal ratio mode

2020-06-29 Thread Shengjiu Wang
The ASRC not only supports ideal ratio mode, but also supports
internal ratio mode.

For internal rato mode, the rate of clock source should be divided
with no remainder by sample rate, otherwise there is sound
distortion.

Add function fsl_asrc_select_clk() to find proper clock source for
internal ratio mode, if the clock source is available then internal
ratio mode will be selected.

With change, the ideal ratio mode is not the only option for user.

Signed-off-by: Shengjiu Wang 
---
 sound/soc/fsl/fsl_asrc.c | 58 ++--
 1 file changed, 56 insertions(+), 2 deletions(-)

diff --git a/sound/soc/fsl/fsl_asrc.c b/sound/soc/fsl/fsl_asrc.c
index 95f6a9617b0b..fcafc8ecb131 100644
--- a/sound/soc/fsl/fsl_asrc.c
+++ b/sound/soc/fsl/fsl_asrc.c
@@ -582,11 +582,59 @@ static int fsl_asrc_dai_startup(struct snd_pcm_substream 
*substream,
SNDRV_PCM_HW_PARAM_RATE, _asrc_rate_constraints);
 }
 
+/**
+ * Select proper clock source for internal ratio mode
+ */
+static int fsl_asrc_select_clk(struct fsl_asrc_priv *asrc_priv,
+  struct fsl_asrc_pair *pair,
+  int in_rate,
+  int out_rate)
+{
+   struct fsl_asrc_pair_priv *pair_priv = pair->private;
+   struct asrc_config *config = pair_priv->config;
+   int rate[2], select_clk[2]; /* Array size 2 means IN and OUT */
+   int clk_rate, clk_index;
+   int i = 0, j = 0;
+   bool clk_sel[2];
+
+   rate[0] = in_rate;
+   rate[1] = out_rate;
+
+   /* Select proper clock source for internal ratio mode */
+   for (j = 0; j < 2; j++) {
+   for (i = 0; i < ASRC_CLK_MAP_LEN; i++) {
+   clk_index = asrc_priv->clk_map[j][i];
+   clk_rate = 
clk_get_rate(asrc_priv->asrck_clk[clk_index]);
+   if (clk_rate != 0 && (clk_rate / rate[j]) <= 1024 &&
+   (clk_rate % rate[j]) == 0)
+   break;
+   }
+
+   if (i == ASRC_CLK_MAP_LEN) {
+   select_clk[j] = OUTCLK_ASRCK1_CLK;
+   clk_sel[j] = false;
+   } else {
+   select_clk[j] = i;
+   clk_sel[j] = true;
+   }
+   }
+
+   /* Switch to ideal ratio mode if there is no proper clock source */
+   if (!clk_sel[IN] || !clk_sel[OUT])
+   select_clk[IN] = INCLK_NONE;
+
+   config->inclk = select_clk[IN];
+   config->outclk = select_clk[OUT];
+
+   return 0;
+}
+
 static int fsl_asrc_dai_hw_params(struct snd_pcm_substream *substream,
  struct snd_pcm_hw_params *params,
  struct snd_soc_dai *dai)
 {
struct fsl_asrc *asrc = snd_soc_dai_get_drvdata(dai);
+   struct fsl_asrc_priv *asrc_priv = asrc->private;
struct snd_pcm_runtime *runtime = substream->runtime;
struct fsl_asrc_pair *pair = runtime->private_data;
struct fsl_asrc_pair_priv *pair_priv = pair->private;
@@ -605,8 +653,6 @@ static int fsl_asrc_dai_hw_params(struct snd_pcm_substream 
*substream,
 
config.pair = pair->index;
config.channel_num = channels;
-   config.inclk = INCLK_NONE;
-   config.outclk = OUTCLK_ASRCK1_CLK;
 
if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) {
config.input_format   = params_format(params);
@@ -620,6 +666,14 @@ static int fsl_asrc_dai_hw_params(struct snd_pcm_substream 
*substream,
config.output_sample_rate = rate;
}
 
+   ret = fsl_asrc_select_clk(asrc_priv, pair,
+ config.input_sample_rate,
+ config.output_sample_rate);
+   if (ret) {
+   dev_err(dai->dev, "fail to select clock\n");
+   return ret;
+   }
+
ret = fsl_asrc_config_pair(pair, false);
if (ret) {
dev_err(dai->dev, "fail to config asrc pair\n");
-- 
2.21.0



[PATCH v6 0/8] Support new pmem flush and sync instructions for POWER

2020-06-29 Thread Aneesh Kumar K.V
This patch series enables the usage os new pmem flush and sync instructions on 
POWER
architecture. POWER10 introduces two new variants of dcbf instructions (dcbstps 
and dcbfps)
that can be used to write modified locations back to persistent storage. 
Additionally,
POWER10 also introduce phwsync and plwsync which can be used to establish order 
of these
writes to persistent storage.

This series exposes these instructions to the rest of the kernel. The existing
dcbf and hwsync instructions in P8 and P9 are adequate to enable appropriate
synchronization with OpenCAPI-hosted persistent storage. Hence the new 
instructions
are added as a variant of the old ones that old hardware won't differentiate.

On POWER10, pmem devices will be represented by a different device tree compat
strings. This ensures that older kernels won't initialize pmem devices on 
POWER10.

With this:
1) vPMEM continues to work since it is a volatile region. That 
doesn't need any flush instructions.

2) pmdk and other user applications get updated to use new instructions
and updated packages are made available to all distributions

3) On newer hardware, the device will appear with a new compat string. 
Hence older distributions won't initialize pmem on newer hardware.

Changes from v5:
* Drop CONFIG_ARCH_MAP_SYNC_DISABLE and related changes

Changes from V4:
* Add namespace specific sychronous fault control.

Changes from V3:
* Add new compat string to be used for the device.
* Use arch_pmem_flush_barrier() in dm-writecache.

Aneesh Kumar K.V (8):
  powerpc/pmem: Restrict papr_scm to P8 and above.
  powerpc/pmem: Add new instructions for persistent storage and sync
  powerpc/pmem: Add flush routines using new pmem store and sync
instruction
  libnvdimm/nvdimm/flush: Allow architecture to override the flush
barrier
  powerpc/pmem/of_pmem: Update of_pmem to use the new barrier
instruction.
  powerpc/pmem: Avoid the barrier in flush routines
  powerpc/pmem: Add WARN_ONCE to catch the wrong usage of pmem flush
functions.
  powerpc/pmem: Initialize pmem device on newer hardware

 arch/powerpc/include/asm/cacheflush.h | 10 +
 arch/powerpc/include/asm/ppc-opcode.h | 12 ++
 arch/powerpc/lib/pmem.c   | 46 +--
 arch/powerpc/platforms/pseries/papr_scm.c | 14 +++
 arch/powerpc/platforms/pseries/pmem.c |  6 +++
 drivers/md/dm-writecache.c|  2 +-
 drivers/nvdimm/of_pmem.c  |  1 +
 drivers/nvdimm/region_devs.c  |  8 ++--
 include/asm-generic/cacheflush.h  |  4 ++
 9 files changed, 94 insertions(+), 9 deletions(-)

-- 
2.26.2



[PATCH v6 8/8] powerpc/pmem: Initialize pmem device on newer hardware

2020-06-29 Thread Aneesh Kumar K.V
With kernel now supporting new pmem flush/sync instructions, we can now
enable the kernel to initialize the device. On P10 these devices would
appear with a new compatible string. For PAPR device we have

compatible   "ibm,pmemory-v2"

and for OF pmem device we have

compatible   "pmem-region-v2"

Signed-off-by: Aneesh Kumar K.V 
---
 arch/powerpc/platforms/pseries/papr_scm.c | 1 +
 drivers/nvdimm/of_pmem.c  | 1 +
 2 files changed, 2 insertions(+)

diff --git a/arch/powerpc/platforms/pseries/papr_scm.c 
b/arch/powerpc/platforms/pseries/papr_scm.c
index 9a9a0766f8b6..617dfa4d7ec1 100644
--- a/arch/powerpc/platforms/pseries/papr_scm.c
+++ b/arch/powerpc/platforms/pseries/papr_scm.c
@@ -889,6 +889,7 @@ static int papr_scm_remove(struct platform_device *pdev)
 
 static const struct of_device_id papr_scm_match[] = {
{ .compatible = "ibm,pmemory" },
+   { .compatible = "ibm,pmemory-v2" },
{ },
 };
 
diff --git a/drivers/nvdimm/of_pmem.c b/drivers/nvdimm/of_pmem.c
index 6826a274a1f1..10dbdcdfb9ce 100644
--- a/drivers/nvdimm/of_pmem.c
+++ b/drivers/nvdimm/of_pmem.c
@@ -90,6 +90,7 @@ static int of_pmem_region_remove(struct platform_device *pdev)
 
 static const struct of_device_id of_pmem_region_match[] = {
{ .compatible = "pmem-region" },
+   { .compatible = "pmem-region-v2" },
{ },
 };
 
-- 
2.26.2



[PATCH v6 7/8] powerpc/pmem: Add WARN_ONCE to catch the wrong usage of pmem flush functions.

2020-06-29 Thread Aneesh Kumar K.V
We only support persistent memory on P8 and above. This is enforced by the
firmware and further checked on virtualzied platform during platform init.
Add WARN_ONCE in pmem flush routines to catch the wrong usage of these.

Signed-off-by: Aneesh Kumar K.V 
---
 arch/powerpc/include/asm/cacheflush.h | 2 ++
 arch/powerpc/lib/pmem.c   | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/arch/powerpc/include/asm/cacheflush.h 
b/arch/powerpc/include/asm/cacheflush.h
index 95782f77d768..1ab0fa660497 100644
--- a/arch/powerpc/include/asm/cacheflush.h
+++ b/arch/powerpc/include/asm/cacheflush.h
@@ -103,6 +103,8 @@ static inline void  arch_pmem_flush_barrier(void)
 {
if (cpu_has_feature(CPU_FTR_ARCH_207S))
asm volatile(PPC_PHWSYNC ::: "memory");
+   else
+   WARN_ONCE(1, "Using pmem flush on older hardware.");
 }
 
 #include 
diff --git a/arch/powerpc/lib/pmem.c b/arch/powerpc/lib/pmem.c
index 21210fa676e5..f40bd908d28d 100644
--- a/arch/powerpc/lib/pmem.c
+++ b/arch/powerpc/lib/pmem.c
@@ -37,12 +37,14 @@ static inline void clean_pmem_range(unsigned long start, 
unsigned long stop)
 {
if (cpu_has_feature(CPU_FTR_ARCH_207S))
return __clean_pmem_range(start, stop);
+   WARN_ONCE(1, "Using pmem flush on older hardware.");
 }
 
 static inline void flush_pmem_range(unsigned long start, unsigned long stop)
 {
if (cpu_has_feature(CPU_FTR_ARCH_207S))
return __flush_pmem_range(start, stop);
+   WARN_ONCE(1, "Using pmem flush on older hardware.");
 }
 
 /*
-- 
2.26.2



[PATCH v6 6/8] powerpc/pmem: Avoid the barrier in flush routines

2020-06-29 Thread Aneesh Kumar K.V
nvdimm expect the flush routines to just mark the cache clean. The barrier
that mark the store globally visible is done in nvdimm_flush().

Update the papr_scm driver to a simplified nvdim_flush callback that do
only the required barrier.

Signed-off-by: Aneesh Kumar K.V 
---
 arch/powerpc/lib/pmem.c   |  6 --
 arch/powerpc/platforms/pseries/papr_scm.c | 13 +
 2 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/lib/pmem.c b/arch/powerpc/lib/pmem.c
index 5a61aaeb6930..21210fa676e5 100644
--- a/arch/powerpc/lib/pmem.c
+++ b/arch/powerpc/lib/pmem.c
@@ -19,9 +19,6 @@ static inline void __clean_pmem_range(unsigned long start, 
unsigned long stop)
 
for (i = 0; i < size >> shift; i++, addr += bytes)
asm volatile(PPC_DCBSTPS(%0, %1): :"i"(0), "r"(addr): "memory");
-
-
-   asm volatile(PPC_PHWSYNC ::: "memory");
 }
 
 static inline void __flush_pmem_range(unsigned long start, unsigned long stop)
@@ -34,9 +31,6 @@ static inline void __flush_pmem_range(unsigned long start, 
unsigned long stop)
 
for (i = 0; i < size >> shift; i++, addr += bytes)
asm volatile(PPC_DCBFPS(%0, %1): :"i"(0), "r"(addr): "memory");
-
-
-   asm volatile(PPC_PHWSYNC ::: "memory");
 }
 
 static inline void clean_pmem_range(unsigned long start, unsigned long stop)
diff --git a/arch/powerpc/platforms/pseries/papr_scm.c 
b/arch/powerpc/platforms/pseries/papr_scm.c
index 9c569078a09f..9a9a0766f8b6 100644
--- a/arch/powerpc/platforms/pseries/papr_scm.c
+++ b/arch/powerpc/platforms/pseries/papr_scm.c
@@ -630,6 +630,18 @@ static int papr_scm_ndctl(struct nvdimm_bus_descriptor 
*nd_desc,
 
return 0;
 }
+/*
+ * We have made sure the pmem writes are done such that before calling this
+ * all the caches are flushed/clean. We use dcbf/dcbfps to ensure this. Here
+ * we just need to add the necessary barrier to make sure the above flushes
+ * are have updated persistent storage before any data access or data transfer
+ * caused by subsequent instructions is initiated.
+ */
+static int papr_scm_flush_sync(struct nd_region *nd_region, struct bio *bio)
+{
+   arch_pmem_flush_barrier();
+   return 0;
+}
 
 static ssize_t flags_show(struct device *dev,
  struct device_attribute *attr, char *buf)
@@ -743,6 +755,7 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
ndr_desc.mapping = 
ndr_desc.num_mappings = 1;
ndr_desc.nd_set = >nd_set;
+   ndr_desc.flush = papr_scm_flush_sync;
 
if (p->is_volatile)
p->region = nvdimm_volatile_region_create(p->bus, _desc);
-- 
2.26.2



[PATCH v6 4/8] libnvdimm/nvdimm/flush: Allow architecture to override the flush barrier

2020-06-29 Thread Aneesh Kumar K.V
Architectures like ppc64 provide persistent memory specific barriers
that will ensure that all stores for which the modifications are
written to persistent storage by preceding dcbfps and dcbstps
instructions have updated persistent storage before any data
access or data transfer caused by subsequent instructions is initiated.
This is in addition to the ordering done by wmb()

Update nvdimm core such that architecture can use barriers other than
wmb to ensure all previous writes are architecturally visible for
the platform buffer flush.

Signed-off-by: Aneesh Kumar K.V 
---
 drivers/md/dm-writecache.c   | 2 +-
 drivers/nvdimm/region_devs.c | 8 
 include/asm-generic/cacheflush.h | 4 
 3 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c
index 74f3c506f084..8c6b6dce64e2 100644
--- a/drivers/md/dm-writecache.c
+++ b/drivers/md/dm-writecache.c
@@ -536,7 +536,7 @@ static void ssd_commit_superblock(struct dm_writecache *wc)
 static void writecache_commit_flushed(struct dm_writecache *wc, bool 
wait_for_ios)
 {
if (WC_MODE_PMEM(wc))
-   wmb();
+   arch_pmem_flush_barrier();
else
ssd_commit_flushed(wc, wait_for_ios);
 }
diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c
index 4502f9c4708d..b308ad09b63d 100644
--- a/drivers/nvdimm/region_devs.c
+++ b/drivers/nvdimm/region_devs.c
@@ -1206,13 +1206,13 @@ int generic_nvdimm_flush(struct nd_region *nd_region)
idx = this_cpu_add_return(flush_idx, hash_32(current->pid + idx, 8));
 
/*
-* The first wmb() is needed to 'sfence' all previous writes
-* such that they are architecturally visible for the platform
-* buffer flush.  Note that we've already arranged for pmem
+* The first arch_pmem_flush_barrier() is needed to 'sfence' all
+* previous writes such that they are architecturally visible for
+* the platform buffer flush. Note that we've already arranged for pmem
 * writes to avoid the cache via memcpy_flushcache().  The final
 * wmb() ensures ordering for the NVDIMM flush write.
 */
-   wmb();
+   arch_pmem_flush_barrier();
for (i = 0; i < nd_region->ndr_mappings; i++)
if (ndrd_get_flush_wpq(ndrd, i, 0))
writeq(1, ndrd_get_flush_wpq(ndrd, i, idx));
diff --git a/include/asm-generic/cacheflush.h b/include/asm-generic/cacheflush.h
index 907fa5d16494..e30a9aa950dc 100644
--- a/include/asm-generic/cacheflush.h
+++ b/include/asm-generic/cacheflush.h
@@ -110,4 +110,8 @@ static inline void flush_cache_vunmap(unsigned long start, 
unsigned long end)
memcpy(dst, src, len)
 #endif
 
+#ifndef arch_pmem_flush_barrier
+#define arch_pmem_flush_barrier() wmb()
+#endif
+
 #endif /* _ASM_GENERIC_CACHEFLUSH_H */
-- 
2.26.2



[PATCH v6 5/8] powerpc/pmem/of_pmem: Update of_pmem to use the new barrier instruction.

2020-06-29 Thread Aneesh Kumar K.V
of_pmem on POWER10 can now use phwsync instead of hwsync to ensure
all previous writes are architecturally visible for the platform
buffer flush.

Signed-off-by: Aneesh Kumar K.V 
---
 arch/powerpc/include/asm/cacheflush.h | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/arch/powerpc/include/asm/cacheflush.h 
b/arch/powerpc/include/asm/cacheflush.h
index 54764c6e922d..95782f77d768 100644
--- a/arch/powerpc/include/asm/cacheflush.h
+++ b/arch/powerpc/include/asm/cacheflush.h
@@ -98,6 +98,13 @@ static inline void invalidate_dcache_range(unsigned long 
start,
mb();   /* sync */
 }
 
+#define arch_pmem_flush_barrier arch_pmem_flush_barrier
+static inline void  arch_pmem_flush_barrier(void)
+{
+   if (cpu_has_feature(CPU_FTR_ARCH_207S))
+   asm volatile(PPC_PHWSYNC ::: "memory");
+}
+
 #include 
 
 #endif /* _ASM_POWERPC_CACHEFLUSH_H */
-- 
2.26.2



[PATCH v6 3/8] powerpc/pmem: Add flush routines using new pmem store and sync instruction

2020-06-29 Thread Aneesh Kumar K.V
Start using dcbstps; phwsync; sequence for flushing persistent memory range.
The new instructions are implemented as a variant of dcbf and hwsync and on
P8 and P9 they will be executed as those instructions. We avoid using them on
older hardware. This helps to avoid difficult to debug bugs.

Signed-off-by: Aneesh Kumar K.V 
---
 arch/powerpc/include/asm/cacheflush.h |  1 +
 arch/powerpc/lib/pmem.c   | 50 ---
 2 files changed, 47 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/include/asm/cacheflush.h 
b/arch/powerpc/include/asm/cacheflush.h
index de600b915a3c..54764c6e922d 100644
--- a/arch/powerpc/include/asm/cacheflush.h
+++ b/arch/powerpc/include/asm/cacheflush.h
@@ -6,6 +6,7 @@
 
 #include 
 #include 
+#include 
 
 #ifdef CONFIG_PPC_BOOK3S_64
 /*
diff --git a/arch/powerpc/lib/pmem.c b/arch/powerpc/lib/pmem.c
index 0666a8d29596..5a61aaeb6930 100644
--- a/arch/powerpc/lib/pmem.c
+++ b/arch/powerpc/lib/pmem.c
@@ -9,20 +9,62 @@
 
 #include 
 
+static inline void __clean_pmem_range(unsigned long start, unsigned long stop)
+{
+   unsigned long shift = l1_dcache_shift();
+   unsigned long bytes = l1_dcache_bytes();
+   void *addr = (void *)(start & ~(bytes - 1));
+   unsigned long size = stop - (unsigned long)addr + (bytes - 1);
+   unsigned long i;
+
+   for (i = 0; i < size >> shift; i++, addr += bytes)
+   asm volatile(PPC_DCBSTPS(%0, %1): :"i"(0), "r"(addr): "memory");
+
+
+   asm volatile(PPC_PHWSYNC ::: "memory");
+}
+
+static inline void __flush_pmem_range(unsigned long start, unsigned long stop)
+{
+   unsigned long shift = l1_dcache_shift();
+   unsigned long bytes = l1_dcache_bytes();
+   void *addr = (void *)(start & ~(bytes - 1));
+   unsigned long size = stop - (unsigned long)addr + (bytes - 1);
+   unsigned long i;
+
+   for (i = 0; i < size >> shift; i++, addr += bytes)
+   asm volatile(PPC_DCBFPS(%0, %1): :"i"(0), "r"(addr): "memory");
+
+
+   asm volatile(PPC_PHWSYNC ::: "memory");
+}
+
+static inline void clean_pmem_range(unsigned long start, unsigned long stop)
+{
+   if (cpu_has_feature(CPU_FTR_ARCH_207S))
+   return __clean_pmem_range(start, stop);
+}
+
+static inline void flush_pmem_range(unsigned long start, unsigned long stop)
+{
+   if (cpu_has_feature(CPU_FTR_ARCH_207S))
+   return __flush_pmem_range(start, stop);
+}
+
 /*
  * CONFIG_ARCH_HAS_PMEM_API symbols
  */
 void arch_wb_cache_pmem(void *addr, size_t size)
 {
unsigned long start = (unsigned long) addr;
-   flush_dcache_range(start, start + size);
+   clean_pmem_range(start, start + size);
 }
 EXPORT_SYMBOL_GPL(arch_wb_cache_pmem);
 
 void arch_invalidate_pmem(void *addr, size_t size)
 {
unsigned long start = (unsigned long) addr;
-   flush_dcache_range(start, start + size);
+   flush_pmem_range(start, start + size);
 }
 EXPORT_SYMBOL_GPL(arch_invalidate_pmem);
 
@@ -35,7 +77,7 @@ long __copy_from_user_flushcache(void *dest, const void 
__user *src,
unsigned long copied, start = (unsigned long) dest;
 
copied = __copy_from_user(dest, src, size);
-   flush_dcache_range(start, start + size);
+   clean_pmem_range(start, start + size);
 
return copied;
 }
@@ -45,7 +87,7 @@ void *memcpy_flushcache(void *dest, const void *src, size_t 
size)
unsigned long start = (unsigned long) dest;
 
memcpy(dest, src, size);
-   flush_dcache_range(start, start + size);
+   clean_pmem_range(start, start + size);
 
return dest;
 }
-- 
2.26.2



[PATCH v6 2/8] powerpc/pmem: Add new instructions for persistent storage and sync

2020-06-29 Thread Aneesh Kumar K.V
POWER10 introduces two new variants of dcbf instructions (dcbstps and dcbfps)
that can be used to write modified locations back to persistent storage.

Additionally, POWER10 also introduce phwsync and plwsync which can be used
to establish order of these writes to persistent storage.

This patch exposes these instructions to the rest of the kernel. The existing
dcbf and hwsync instructions in P8 and P9 are adequate to enable appropriate
synchronization with OpenCAPI-hosted persistent storage. Hence the new
instructions are added as a variant of the old ones that old hardware
won't differentiate.

Signed-off-by: Aneesh Kumar K.V 
---
 arch/powerpc/include/asm/ppc-opcode.h | 12 
 1 file changed, 12 insertions(+)

diff --git a/arch/powerpc/include/asm/ppc-opcode.h 
b/arch/powerpc/include/asm/ppc-opcode.h
index 2a39c716c343..1ad014e4633e 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -219,6 +219,8 @@
 #define PPC_INST_STWCX 0x7c00012d
 #define PPC_INST_LWSYNC0x7c2004ac
 #define PPC_INST_SYNC  0x7c0004ac
+#define PPC_INST_PHWSYNC   0x7c8004ac
+#define PPC_INST_PLWSYNC   0x7ca004ac
 #define PPC_INST_SYNC_MASK 0xfc0007fe
 #define PPC_INST_ISYNC 0x4c00012c
 #define PPC_INST_LXVD2X0x7c000698
@@ -284,6 +286,8 @@
 #define PPC_INST_TABORT0x7c00071d
 #define PPC_INST_TSR   0x7c0005dd
 
+#define PPC_INST_DCBF  0x7cac
+
 #define PPC_INST_NAP   0x4c000364
 #define PPC_INST_SLEEP 0x4c0003a4
 #define PPC_INST_WINKLE0x4c0003e4
@@ -532,6 +536,14 @@
 #define STBCIX(s,a,b)  stringify_in_c(.long PPC_INST_STBCIX | \
   __PPC_RS(s) | __PPC_RA(a) | __PPC_RB(b))
 
+#definePPC_DCBFPS(a, b)stringify_in_c(.long PPC_INST_DCBF |
\
+  ___PPC_RA(a) | ___PPC_RB(b) | (4 << 21))
+#definePPC_DCBSTPS(a, b)   stringify_in_c(.long PPC_INST_DCBF |
\
+  ___PPC_RA(a) | ___PPC_RB(b) | (6 << 21))
+
+#definePPC_PHWSYNC stringify_in_c(.long PPC_INST_PHWSYNC)
+#definePPC_PLWSYNC stringify_in_c(.long PPC_INST_PLWSYNC)
+
 /*
  * Define what the VSX XX1 form instructions will look like, then add
  * the 128 bit load store instructions based on that.
-- 
2.26.2



[PATCH v6 1/8] powerpc/pmem: Restrict papr_scm to P8 and above.

2020-06-29 Thread Aneesh Kumar K.V
The PAPR based virtualized persistent memory devices are only supported on
POWER9 and above. In the followup patch, the kernel will switch the persistent
memory cache flush functions to use a new `dcbf` variant instruction. The new
instructions even though added in ISA 3.1 works even on P8 and P9 because these
are implemented as a variant of existing `dcbf` and `hwsync` and on P8 and
P9 behaves as such.

Considering these devices are only supported on P8 and above,  update the driver
to prevent a P7-compat guest from using persistent memory devices.

We don't update of_pmem driver with the same condition, because, on bare-metal,
the firmware enables pmem support only on P9 and above. There the kernel depends
on OPAL firmware to restrict exposing persistent memory related device tree
entries on older hardware. of_pmem.ko is written without any arch dependency and
we don't want to add ppc64 specific cpu feature check in of_pmem driver.

Signed-off-by: Aneesh Kumar K.V 
---
 arch/powerpc/platforms/pseries/pmem.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/arch/powerpc/platforms/pseries/pmem.c 
b/arch/powerpc/platforms/pseries/pmem.c
index f860a897a9e0..2347e1038f58 100644
--- a/arch/powerpc/platforms/pseries/pmem.c
+++ b/arch/powerpc/platforms/pseries/pmem.c
@@ -147,6 +147,12 @@ const struct of_device_id drc_pmem_match[] = {
 
 static int pseries_pmem_init(void)
 {
+   /*
+* Only supported on POWER8 and above.
+*/
+   if (!cpu_has_feature(CPU_FTR_ARCH_207S))
+   return 0;
+
pmem_node = of_find_node_by_type(NULL, "ibm,persistent-memory");
if (!pmem_node)
return 0;
-- 
2.26.2



Re: [PATCH 1/3] powerpc/cacheinfo: Use cpumap_print to print cpumap

2020-06-29 Thread Tejun Heo
On Mon, Jun 29, 2020 at 04:07:01PM +0530, Srikar Dronamraju wrote:
> Tejun Heo had modified shared_cpu_map_show to use scnprintf instead of
> cpumap_print during support for *pb[l] format.
> Refer commit 0c118b7bd09a ("powerpc: use %*pb[l] to print bitmaps including
> cpumasks and nodemasks")
> 
> cpumap_print_to_pagebuf is a standard function to print cpumap.  With
> commit 9cf79d115f0d ("bitmap: remove explicit newline handling using
> scnprintf format string"), there is no need to print explicit newline and
> trailing null character. cpumap_print_to_pagebuf internally uses
> scnprintf. Hence replace scnprintf with cpumap_print_to_pagebuf.
> 
> Note: shared_cpu_map_show in drivers/base/cacheinfo.c already uses
> cpumap_print_to_pagebuf.
> 
> Before this patch
> # cat /sys/devices/system/cpu0/cache/index1/shared_cpu_map
> 00ff
> 
> #
>   (Notice the extra blank line).
> 
> After this patch
> # cat /sys/devices/system/cpu0/cache/index1/shared_cpu_map
> 00ff
> #
> 
> Cc: Nathan Lynch 
> Cc: Tejun Heo 
> Cc: Michael Ellerman 
> Cc: linuxppc-dev@lists.ozlabs.org
> Signed-off-by: Srikar Dronamraju 

Acked-by: Tejun Heo 

Thanks.

-- 
tejun


Re: [PATCH 01/11] kexec_file: allow archs to handle special regions while locating memory hole

2020-06-29 Thread Hari Bathini
Hi Petr,

On 29/06/20 5:09 pm, Petr Tesarik wrote:
> Hi Hari,
> 
> is there any good reason to add two more functions with a very similar
> name to an existing function? AFAICS all you need is a way to call a
> PPC64-specific function from within kexec_add_buffer (PATCH 4/11), so
> you could add something like this:
> 
> int __weak arch_kexec_locate_mem_hole(struct kexec_buf *kbuf)
> {
>   return 0;
> }
> 
> Call this function from kexec_add_buffer where appropriate and then
> override it for PPC64 (it roughly corresponds to your
> kexec_locate_mem_hole_ppc64() from PATCH 4/11).
> 
> FWIW it would make it easier for me to follow the resulting code.

Right, Petr.

I was trying out a few things before I ended up with what I sent here.
Bu yeah.. I did realize arch_kexec_locate_mem_hole() would have been better
after sending out v1. Will take care of that in v2.

Thanks
Hari


Re: [PATCH 01/11] kexec_file: allow archs to handle special regions while locating memory hole

2020-06-29 Thread Petr Tesarik
Hi Hari,

is there any good reason to add two more functions with a very similar
name to an existing function? AFAICS all you need is a way to call a
PPC64-specific function from within kexec_add_buffer (PATCH 4/11), so
you could add something like this:

int __weak arch_kexec_locate_mem_hole(struct kexec_buf *kbuf)
{
return 0;
}

Call this function from kexec_add_buffer where appropriate and then
override it for PPC64 (it roughly corresponds to your
kexec_locate_mem_hole_ppc64() from PATCH 4/11).

FWIW it would make it easier for me to follow the resulting code.

Petr T

On Sat, 27 Jun 2020 00:34:43 +0530
Hari Bathini  wrote:

> Some archs can have special memory regions, within the given memory
> range, which can't be used for the buffer in a kexec segment. As
> kexec_add_buffer() function is being called from generic code as well,
> add weak arch_kexec_add_buffer definition for archs to override & take
> care of special regions before trying to locate a memory hole.
> 
> Signed-off-by: Hari Bathini 
> ---
>  include/linux/kexec.h |5 +
>  kernel/kexec_file.c   |   37 +
>  2 files changed, 38 insertions(+), 4 deletions(-)
> 
> diff --git a/include/linux/kexec.h b/include/linux/kexec.h
> index 1776eb2..1237682 100644
> --- a/include/linux/kexec.h
> +++ b/include/linux/kexec.h
> @@ -195,6 +195,11 @@ int __weak arch_kexec_apply_relocations(struct 
> purgatory_info *pi,
>   const Elf_Shdr *relsec,
>   const Elf_Shdr *symtab);
>  
> +extern int arch_kexec_add_buffer(struct kexec_buf *kbuf);
> +
> +/* arch_kexec_add_buffer calls this when it is ready */
> +extern int __kexec_add_buffer(struct kexec_buf *kbuf);
> +
>  extern int kexec_add_buffer(struct kexec_buf *kbuf);
>  int kexec_locate_mem_hole(struct kexec_buf *kbuf);
>  
> diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
> index bb05fd5..a0b4f7f 100644
> --- a/kernel/kexec_file.c
> +++ b/kernel/kexec_file.c
> @@ -669,10 +669,6 @@ int kexec_locate_mem_hole(struct kexec_buf *kbuf)
>   */
>  int kexec_add_buffer(struct kexec_buf *kbuf)
>  {
> -
> - struct kexec_segment *ksegment;
> - int ret;
> -
>   /* Currently adding segment this way is allowed only in file mode */
>   if (!kbuf->image->file_mode)
>   return -EINVAL;
> @@ -696,6 +692,25 @@ int kexec_add_buffer(struct kexec_buf *kbuf)
>   kbuf->memsz = ALIGN(kbuf->memsz, PAGE_SIZE);
>   kbuf->buf_align = max(kbuf->buf_align, PAGE_SIZE);
>  
> + return arch_kexec_add_buffer(kbuf);
> +}
> +
> +/**
> + * __kexec_add_buffer - arch_kexec_add_buffer would call this function after
> + *  updating kbuf, to place a buffer in a kexec segment.
> + * @kbuf:   Buffer contents and memory parameters.
> + *
> + * This function assumes that kexec_mutex is held.
> + * On successful return, @kbuf->mem will have the physical address of
> + * the buffer in memory.
> + *
> + * Return: 0 on success, negative errno on error.
> + */
> +int __kexec_add_buffer(struct kexec_buf *kbuf)
> +{
> + struct kexec_segment *ksegment;
> + int ret;
> +
>   /* Walk the RAM ranges and allocate a suitable range for the buffer */
>   ret = kexec_locate_mem_hole(kbuf);
>   if (ret)
> @@ -711,6 +726,20 @@ int kexec_add_buffer(struct kexec_buf *kbuf)
>   return 0;
>  }
>  
> +/**
> + * arch_kexec_add_buffer - Some archs have memory regions within the given
> + * range that can't be used to place a kexec segment.
> + * Such archs can override this function to take care
> + * of them before trying to locate the memory hole.
> + * @kbuf:  Buffer contents and memory parameters.
> + *
> + * Return: 0 on success, negative errno on error.
> + */
> +int __weak arch_kexec_add_buffer(struct kexec_buf *kbuf)
> +{
> + return __kexec_add_buffer(kbuf);
> +}
> +
>  /* Calculate and store the digest of segments */
>  static int kexec_calculate_store_digests(struct kimage *image)
>  {
> 



pgpx1VfYXBgTp.pgp
Description: Digitální podpis OpenPGP


Re: [PATCH v2] powerpc/uaccess: Use flexible addressing with __put_user()/__get_user()

2020-06-29 Thread Michael Ellerman
Christophe Leroy  writes:
> Hi Michael,
>
> I see this patch is marked as "defered" in patchwork, but I can't see 
> any related discussion. Is it normal ?

Because it uses the "m<>" constraint which didn't work on GCC 4.6.

https://github.com/linuxppc/issues/issues/297

So we should be able to pick it up for v5.9 hopefully.

cheers


> Le 16/04/2020 à 14:39, Christophe Leroy a écrit :
>> At the time being, __put_user()/__get_user() and friends only use
>> D-form addressing, with 0 offset. Ex:
>> 
>>  lwz reg1, 0(reg2)
>> 
>> Give the compiler the opportunity to use other adressing modes
>> whenever possible, to get more optimised code.
>> 
>> Hereunder is a small exemple:
>> 
>> struct test {
>>  u32 item1;
>>  u16 item2;
>>  u8 item3;
>>  u64 item4;
>> };
>> 
>> int set_test_user(struct test __user *from, struct test __user *to)
>> {
>>  int err;
>>  u32 item1;
>>  u16 item2;
>>  u8 item3;
>>  u64 item4;
>> 
>>  err = __get_user(item1, >item1);
>>  err |= __get_user(item2, >item2);
>>  err |= __get_user(item3, >item3);
>>  err |= __get_user(item4, >item4);
>> 
>>  err |= __put_user(item1, >item1);
>>  err |= __put_user(item2, >item2);
>>  err |= __put_user(item3, >item3);
>>  err |= __put_user(item4, >item4);
>> 
>>  return err;
>> }
>> 
>> Before the patch:
>> 
>> 0df0 :
>>   df0:   94 21 ff f0 stwur1,-16(r1)
>>   df4:   39 40 00 00 li  r10,0
>>   df8:   93 c1 00 08 stw r30,8(r1)
>>   dfc:   93 e1 00 0c stw r31,12(r1)
>>   e00:   7d 49 53 78 mr  r9,r10
>>   e04:   80 a3 00 00 lwz r5,0(r3)
>>   e08:   38 e3 00 04 addir7,r3,4
>>   e0c:   7d 46 53 78 mr  r6,r10
>>   e10:   a0 e7 00 00 lhz r7,0(r7)
>>   e14:   7d 29 33 78 or  r9,r9,r6
>>   e18:   39 03 00 06 addir8,r3,6
>>   e1c:   7d 46 53 78 mr  r6,r10
>>   e20:   89 08 00 00 lbz r8,0(r8)
>>   e24:   7d 29 33 78 or  r9,r9,r6
>>   e28:   38 63 00 08 addir3,r3,8
>>   e2c:   7d 46 53 78 mr  r6,r10
>>   e30:   83 c3 00 00 lwz r30,0(r3)
>>   e34:   83 e3 00 04 lwz r31,4(r3)
>>   e38:   7d 29 33 78 or  r9,r9,r6
>>   e3c:   7d 43 53 78 mr  r3,r10
>>   e40:   90 a4 00 00 stw r5,0(r4)
>>   e44:   7d 29 1b 78 or  r9,r9,r3
>>   e48:   38 c4 00 04 addir6,r4,4
>>   e4c:   7d 43 53 78 mr  r3,r10
>>   e50:   b0 e6 00 00 sth r7,0(r6)
>>   e54:   7d 29 1b 78 or  r9,r9,r3
>>   e58:   38 e4 00 06 addir7,r4,6
>>   e5c:   7d 43 53 78 mr  r3,r10
>>   e60:   99 07 00 00 stb r8,0(r7)
>>   e64:   7d 23 1b 78 or  r3,r9,r3
>>   e68:   38 84 00 08 addir4,r4,8
>>   e6c:   93 c4 00 00 stw r30,0(r4)
>>   e70:   93 e4 00 04 stw r31,4(r4)
>>   e74:   7c 63 53 78 or  r3,r3,r10
>>   e78:   83 c1 00 08 lwz r30,8(r1)
>>   e7c:   83 e1 00 0c lwz r31,12(r1)
>>   e80:   38 21 00 10 addir1,r1,16
>>   e84:   4e 80 00 20 blr
>> 
>> After the patch:
>> 
>> 0dbc :
>>   dbc:   39 40 00 00 li  r10,0
>>   dc0:   7d 49 53 78 mr  r9,r10
>>   dc4:   80 03 00 00 lwz r0,0(r3)
>>   dc8:   7d 48 53 78 mr  r8,r10
>>   dcc:   a1 63 00 04 lhz r11,4(r3)
>>   dd0:   7d 29 43 78 or  r9,r9,r8
>>   dd4:   7d 48 53 78 mr  r8,r10
>>   dd8:   88 a3 00 06 lbz r5,6(r3)
>>   ddc:   7d 29 43 78 or  r9,r9,r8
>>   de0:   7d 48 53 78 mr  r8,r10
>>   de4:   80 c3 00 08 lwz r6,8(r3)
>>   de8:   80 e3 00 0c lwz r7,12(r3)
>>   dec:   7d 29 43 78 or  r9,r9,r8
>>   df0:   7d 43 53 78 mr  r3,r10
>>   df4:   90 04 00 00 stw r0,0(r4)
>>   df8:   7d 29 1b 78 or  r9,r9,r3
>>   dfc:   7d 43 53 78 mr  r3,r10
>>   e00:   b1 64 00 04 sth r11,4(r4)
>>   e04:   7d 29 1b 78 or  r9,r9,r3
>>   e08:   7d 43 53 78 mr  r3,r10
>>   e0c:   98 a4 00 06 stb r5,6(r4)
>>   e10:   7d 23 1b 78 or  r3,r9,r3
>>   e14:   90 c4 00 08 stw r6,8(r4)
>>   e18:   90 e4 00 0c stw r7,12(r4)
>>   e1c:   7c 63 53 78 or  r3,r3,r10
>>   e20:   4e 80 00 20 blr
>> 
>> Signed-off-by: Christophe Leroy 
>> Reviewed-by: Segher Boessenkool 
>> ---
>> v2:
>> - Added <> modifier in __put_user_asm() and __get_user_asm()
>> - Removed %U2 in __put_user_asm2() and __get_user_asm2()
>> - Reworded the commit log
>> ---
>>   arch/powerpc/include/asm/uaccess.h | 28 ++--
>>   1 file changed, 14 insertions(+), 14 deletions(-)
>> 
>> diff --git a/arch/powerpc/include/asm/uaccess.h 
>> b/arch/powerpc/include/asm/uaccess.h
>> index 

[PATCH v2 2/2] powerpc/ptdump: Refactor update of pg_state

2020-06-29 Thread Christophe Leroy
In note_page(), the pg_state is updated the same way in two places.

Add note_page_update_state() to do it.

Also include the display of boundary markers there as it is missing
"no level" leg, leading to a mismatch when the first two markers
are at the same address and the first displayed area uses that
address.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/mm/ptdump/ptdump.c | 34 +++--
 1 file changed, 20 insertions(+), 14 deletions(-)

diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c
index 20a039867934..c911cd757f7d 100644
--- a/arch/powerpc/mm/ptdump/ptdump.c
+++ b/arch/powerpc/mm/ptdump/ptdump.c
@@ -199,6 +199,24 @@ static void note_prot_wx(struct pg_state *st, unsigned 
long addr)
st->wx_pages += (addr - st->start_address) / PAGE_SIZE;
 }
 
+static void note_page_update_state(struct pg_state *st, unsigned long addr,
+  unsigned int level, u64 val, unsigned long 
page_size)
+{
+   u64 flag = val & pg_level[level].mask;
+   u64 pa = val & PTE_RPN_MASK;
+
+   st->level = level;
+   st->current_flags = flag;
+   st->start_address = addr;
+   st->start_pa = pa;
+   st->page_size = page_size;
+
+   while (addr >= st->marker[1].start_address) {
+   st->marker++;
+   pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
+   }
+}
+
 static void note_page(struct pg_state *st, unsigned long addr,
   unsigned int level, u64 val, unsigned long page_size)
 {
@@ -207,12 +225,8 @@ static void note_page(struct pg_state *st, unsigned long 
addr,
 
/* At first no level is set */
if (!st->level) {
-   st->level = level;
-   st->current_flags = flag;
-   st->start_address = addr;
-   st->start_pa = pa;
-   st->page_size = page_size;
pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
+   note_page_update_state(st, addr, level, val, page_size);
/*
 * Dump the section of virtual memory when:
 *   - the PTE flags from one entry to the next differs.
@@ -244,15 +258,7 @@ static void note_page(struct pg_state *st, unsigned long 
addr,
 * Address indicates we have passed the end of the
 * current section of virtual memory
 */
-   while (addr >= st->marker[1].start_address) {
-   st->marker++;
-   pt_dump_seq_printf(st->seq, "---[ %s ]---\n", 
st->marker->name);
-   }
-   st->start_address = addr;
-   st->start_pa = pa;
-   st->page_size = page_size;
-   st->current_flags = flag;
-   st->level = level;
+   note_page_update_state(st, addr, level, val, page_size);
}
st->last_pa = pa;
 }
-- 
2.25.0



[PATCH v2 1/2] powerpc/ptdump: Refactor update of st->last_pa

2020-06-29 Thread Christophe Leroy
st->last_pa is always updated in note_page() so it can
be done outside the if/elseif/else block.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/mm/ptdump/ptdump.c | 5 +
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c
index b2ed1ca4f254..20a039867934 100644
--- a/arch/powerpc/mm/ptdump/ptdump.c
+++ b/arch/powerpc/mm/ptdump/ptdump.c
@@ -211,7 +211,6 @@ static void note_page(struct pg_state *st, unsigned long 
addr,
st->current_flags = flag;
st->start_address = addr;
st->start_pa = pa;
-   st->last_pa = pa;
st->page_size = page_size;
pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
/*
@@ -251,13 +250,11 @@ static void note_page(struct pg_state *st, unsigned long 
addr,
}
st->start_address = addr;
st->start_pa = pa;
-   st->last_pa = pa;
st->page_size = page_size;
st->current_flags = flag;
st->level = level;
-   } else {
-   st->last_pa = pa;
}
+   st->last_pa = pa;
 }
 
 static void walk_pte(struct pg_state *st, pmd_t *pmd, unsigned long start)
-- 
2.25.0



[PATCH v2 5/6] powerpc/32s: Kernel space starts at TASK_SIZE

2020-06-29 Thread Christophe Leroy
Kernel space starts at TASK_SIZE. Select kernel page table
when address is over TASK_SIZE.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/kernel/head_32.S   | 12 ++--
 arch/powerpc/mm/book3s32/hash_low.S |  2 +-
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S
index 705c042309d8..bbef6ce8322b 100644
--- a/arch/powerpc/kernel/head_32.S
+++ b/arch/powerpc/kernel/head_32.S
@@ -474,7 +474,7 @@ InstructionTLBMiss:
/* Get PTE (linux-style) and check access */
mfspr   r3,SPRN_IMISS
 #if defined(CONFIG_MODULES) || defined(CONFIG_DEBUG_PAGEALLOC)
-   lis r1,PAGE_OFFSET@h/* check if kernel address */
+   lis r1, TASK_SIZE@h /* check if kernel address */
cmplw   0,r1,r3
 #endif
mfspr   r2, SPRN_SPRG_PGDIR
@@ -484,7 +484,7 @@ InstructionTLBMiss:
li  r1,_PAGE_PRESENT | _PAGE_EXEC
 #endif
 #if defined(CONFIG_MODULES) || defined(CONFIG_DEBUG_PAGEALLOC)
-   bge-112f
+   bgt-112f
lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha   /* if kernel address, 
use */
addir2, r2, (swapper_pg_dir - PAGE_OFFSET)@l/* kernel page 
table */
 #endif
@@ -541,7 +541,7 @@ DataLoadTLBMiss:
  */
/* Get PTE (linux-style) and check access */
mfspr   r3,SPRN_DMISS
-   lis r1,PAGE_OFFSET@h/* check if kernel address */
+   lis r1, TASK_SIZE@h /* check if kernel address */
cmplw   0,r1,r3
mfspr   r2, SPRN_SPRG_PGDIR
 #ifdef CONFIG_SWAP
@@ -549,7 +549,7 @@ DataLoadTLBMiss:
 #else
li  r1, _PAGE_PRESENT
 #endif
-   bge-112f
+   bgt-112f
lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha   /* if kernel address, 
use */
addir2, r2, (swapper_pg_dir - PAGE_OFFSET)@l/* kernel page 
table */
 112:   rlwimi  r2,r3,12,20,29  /* insert top 10 bits of address */
@@ -621,7 +621,7 @@ DataStoreTLBMiss:
  */
/* Get PTE (linux-style) and check access */
mfspr   r3,SPRN_DMISS
-   lis r1,PAGE_OFFSET@h/* check if kernel address */
+   lis r1, TASK_SIZE@h /* check if kernel address */
cmplw   0,r1,r3
mfspr   r2, SPRN_SPRG_PGDIR
 #ifdef CONFIG_SWAP
@@ -629,7 +629,7 @@ DataStoreTLBMiss:
 #else
li  r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT
 #endif
-   bge-112f
+   bgt-112f
lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha   /* if kernel address, 
use */
addir2, r2, (swapper_pg_dir - PAGE_OFFSET)@l/* kernel page 
table */
 112:   rlwimi  r2,r3,12,20,29  /* insert top 10 bits of address */
diff --git a/arch/powerpc/mm/book3s32/hash_low.S 
b/arch/powerpc/mm/book3s32/hash_low.S
index 923ad8f374eb..1690d369688b 100644
--- a/arch/powerpc/mm/book3s32/hash_low.S
+++ b/arch/powerpc/mm/book3s32/hash_low.S
@@ -62,7 +62,7 @@ _GLOBAL(hash_page)
isync
 #endif
/* Get PTE (linux-style) and check access */
-   lis r0,KERNELBASE@h /* check if kernel address */
+   lis r0, TASK_SIZE@h /* check if kernel address */
cmplw   0,r4,r0
ori r3,r3,_PAGE_USER|_PAGE_PRESENT /* test low addresses as user */
mfspr   r5, SPRN_SPRG_PGDIR /* phys page-table root */
-- 
2.25.0



[PATCH v2 6/6] powerpc/32s: Use dedicated segment for modules with STRICT_KERNEL_RWX

2020-06-29 Thread Christophe Leroy
When STRICT_KERNEL_RWX is set, we want to set NX bit on vmalloc
segments. But modules require exec.

Use a dedicated segment for modules. There is not much space
above kernel, and we don't waste vmalloc space to do alignment.
Therefore, we take the segment before PAGE_OFFSET for modules.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/Kconfig |  1 +
 arch/powerpc/include/asm/book3s/32/pgtable.h | 15 +--
 arch/powerpc/mm/ptdump/ptdump.c  |  8 
 3 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 51abc59c3334..963b3bc7d969 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -1194,6 +1194,7 @@ config TASK_SIZE_BOOL
 config TASK_SIZE
hex "Size of user task space" if TASK_SIZE_BOOL
default "0x8000" if PPC_8xx
+   default "0xb000" if PPC_BOOK3S_32 && STRICT_KERNEL_RWX
default "0xc000"
 endmenu
 
diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h 
b/arch/powerpc/include/asm/book3s/32/pgtable.h
index 224912432821..36443cda8dcf 100644
--- a/arch/powerpc/include/asm/book3s/32/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
@@ -184,17 +184,7 @@ int map_kernel_page(unsigned long va, phys_addr_t pa, 
pgprot_t prot);
  */
 #define VMALLOC_OFFSET (0x100) /* 16M */
 
-/*
- * With CONFIG_STRICT_KERNEL_RWX, kernel segments are set NX. But when modules
- * are used, NX cannot be set on VMALLOC space. So vmalloc VM space and linear
- * memory shall not share segments.
- */
-#if defined(CONFIG_STRICT_KERNEL_RWX) && defined(CONFIG_MODULES)
-#define VMALLOC_START ((ALIGN((long)high_memory, 256L << 20) + VMALLOC_OFFSET) 
& \
-  ~(VMALLOC_OFFSET - 1))
-#else
 #define VMALLOC_START long)high_memory + VMALLOC_OFFSET) & 
~(VMALLOC_OFFSET-1)))
-#endif
 
 #ifdef CONFIG_KASAN_VMALLOC
 #define VMALLOC_ENDALIGN_DOWN(ioremap_bot, PAGE_SIZE << 
KASAN_SHADOW_SCALE_SHIFT)
@@ -202,6 +192,11 @@ int map_kernel_page(unsigned long va, phys_addr_t pa, 
pgprot_t prot);
 #define VMALLOC_ENDioremap_bot
 #endif
 
+#ifdef CONFIG_STRICT_KERNEL_RWX
+#define MODULES_ENDALIGN_DOWN(PAGE_OFFSET, SZ_256M)
+#define MODULES_VADDR  (MODULES_END - SZ_256M)
+#endif
+
 #ifndef __ASSEMBLY__
 #include 
 #include 
diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c
index 9d942136c7be..b2ed1ca4f254 100644
--- a/arch/powerpc/mm/ptdump/ptdump.c
+++ b/arch/powerpc/mm/ptdump/ptdump.c
@@ -74,6 +74,10 @@ struct addr_marker {
 
 static struct addr_marker address_markers[] = {
{ 0,"Start of kernel VM" },
+#ifdef MODULES_VADDR
+   { 0,"modules start" },
+   { 0,"modules end" },
+#endif
{ 0,"vmalloc() Area" },
{ 0,"vmalloc() End" },
 #ifdef CONFIG_PPC64
@@ -352,6 +356,10 @@ static void populate_markers(void)
address_markers[i++].start_address = PAGE_OFFSET;
 #else
address_markers[i++].start_address = TASK_SIZE;
+#endif
+#ifdef MODULES_VADDR
+   address_markers[i++].start_address = MODULES_VADDR;
+   address_markers[i++].start_address = MODULES_END;
 #endif
address_markers[i++].start_address = VMALLOC_START;
address_markers[i++].start_address = VMALLOC_END;
-- 
2.25.0



[PATCH v2 1/6] powerpc/lib: Prepare code-patching for modules allocated outside vmalloc space

2020-06-29 Thread Christophe Leroy
Use is_vmalloc_or_module_addr() instead of is_vmalloc_addr()

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/lib/code-patching.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
index 0a051dfeb177..8c3934ea6220 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -93,7 +93,7 @@ static int map_patch_area(void *addr, unsigned long 
text_poke_addr)
unsigned long pfn;
int err;
 
-   if (is_vmalloc_addr(addr))
+   if (is_vmalloc_or_module_addr(addr))
pfn = vmalloc_to_pfn(addr);
else
pfn = __pa_symbol(addr) >> PAGE_SHIFT;
-- 
2.25.0



[PATCH v2 2/6] powerpc: Use MODULES_VADDR if defined

2020-06-29 Thread Christophe Leroy
In order to allow allocation of modules outside of vmalloc space,
use MODULES_VADDR and MODULES_END when MODULES_VADDR is defined.

Redefine module_alloc() when MODULES_VADDR defined.
Unmap corresponding KASAN shadow memory.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/kernel/module.c  | 11 +++
 arch/powerpc/mm/kasan/kasan_init_32.c |  6 ++
 2 files changed, 17 insertions(+)

diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c
index df649acb5631..a211b0253cdb 100644
--- a/arch/powerpc/kernel/module.c
+++ b/arch/powerpc/kernel/module.c
@@ -86,3 +86,14 @@ int module_finalize(const Elf_Ehdr *hdr,
 
return 0;
 }
+
+#ifdef MODULES_VADDR
+void *module_alloc(unsigned long size)
+{
+   BUILD_BUG_ON(TASK_SIZE > MODULES_VADDR);
+
+   return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, 
GFP_KERNEL,
+   PAGE_KERNEL_EXEC, VM_FLUSH_RESET_PERMS, 
NUMA_NO_NODE,
+   __builtin_return_address(0));
+}
+#endif
diff --git a/arch/powerpc/mm/kasan/kasan_init_32.c 
b/arch/powerpc/mm/kasan/kasan_init_32.c
index 0760e1e754e4..f1bc267d42af 100644
--- a/arch/powerpc/mm/kasan/kasan_init_32.c
+++ b/arch/powerpc/mm/kasan/kasan_init_32.c
@@ -115,6 +115,12 @@ static void __init kasan_unmap_early_shadow_vmalloc(void)
unsigned long k_end = (unsigned long)kasan_mem_to_shadow((void 
*)VMALLOC_END);
 
kasan_update_early_region(k_start, k_end, __pte(0));
+
+#ifdef MODULES_VADDR
+   k_start = (unsigned long)kasan_mem_to_shadow((void *)MODULES_VADDR);
+   k_end = (unsigned long)kasan_mem_to_shadow((void *)MODULES_END);
+   kasan_update_early_region(k_start, k_end, __pte(0));
+#endif
 }
 
 static void __init kasan_mmu_init(void)
-- 
2.25.0



[PATCH v2 3/6] powerpc/32s: Only leave NX unset on segments used for modules

2020-06-29 Thread Christophe Leroy
Instead of leaving NX unset on all segments above the start
of vmalloc space, only leave NX unset on segments used for
modules.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/mm/book3s32/mmu.c | 17 ++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c
index 03b6ba54460e..c0162911f6cb 100644
--- a/arch/powerpc/mm/book3s32/mmu.c
+++ b/arch/powerpc/mm/book3s32/mmu.c
@@ -187,6 +187,17 @@ unsigned long __init mmu_mapin_ram(unsigned long base, 
unsigned long top)
return __mmu_mapin_ram(border, top);
 }
 
+static bool is_module_segment(unsigned long addr)
+{
+   if (!IS_ENABLED(CONFIG_MODULES))
+   return false;
+   if (addr < ALIGN_DOWN(VMALLOC_START, SZ_256M))
+   return false;
+   if (addr >= ALIGN(VMALLOC_END, SZ_256M))
+   return false;
+   return true;
+}
+
 void mmu_mark_initmem_nx(void)
 {
int nb = mmu_has_feature(MMU_FTR_USE_HIGH_BATS) ? 8 : 4;
@@ -223,9 +234,9 @@ void mmu_mark_initmem_nx(void)
 
for (i = TASK_SIZE >> 28; i < 16; i++) {
/* Do not set NX on VM space for modules */
-   if (IS_ENABLED(CONFIG_MODULES) &&
-   (VMALLOC_START & 0xf000) == i << 28)
-   break;
+   if (is_module_segment(i << 28))
+   continue;
+
mtsrin(mfsrin(i << 28) | 0x1000, i << 28);
}
 }
-- 
2.25.0



[PATCH v2 4/6] powerpc/32: Set user/kernel boundary at TASK_SIZE instead of PAGE_OFFSET

2020-06-29 Thread Christophe Leroy
User space stops at TASK_SIZE. At the moment, kernel space starts
at PAGE_OFFSET.

In order to use space between TASK_SIZE and PAGE_OFFSET for modules,
make TASK_SIZE the limit between user and kernel space.

Note that fault.c already considers TASK_SIZE as the boundary between
user and kernel space.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/include/asm/page.h | 4 +++-
 arch/powerpc/mm/ptdump/ptdump.c | 8 ++--
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
index a63fe6f3a0ff..254687258f42 100644
--- a/arch/powerpc/include/asm/page.h
+++ b/arch/powerpc/include/asm/page.h
@@ -255,8 +255,10 @@ static inline bool pfn_valid(unsigned long pfn)
  */
 #ifdef CONFIG_PPC_BOOK3E_64
 #define is_kernel_addr(x)  ((x) >= 0x8000ul)
-#else
+#elif defined(CONFIG_PPC_BOOK3S_64)
 #define is_kernel_addr(x)  ((x) >= PAGE_OFFSET)
+#else
+#define is_kernel_addr(x)  ((x) >= TASK_SIZE)
 #endif
 
 #ifndef CONFIG_PPC_BOOK3S_64
diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c
index de6e05ef871c..9d942136c7be 100644
--- a/arch/powerpc/mm/ptdump/ptdump.c
+++ b/arch/powerpc/mm/ptdump/ptdump.c
@@ -348,7 +348,11 @@ static void populate_markers(void)
 {
int i = 0;
 
+#ifdef CONFIG_PPC64
address_markers[i++].start_address = PAGE_OFFSET;
+#else
+   address_markers[i++].start_address = TASK_SIZE;
+#endif
address_markers[i++].start_address = VMALLOC_START;
address_markers[i++].start_address = VMALLOC_END;
 #ifdef CONFIG_PPC64
@@ -385,7 +389,7 @@ static int ptdump_show(struct seq_file *m, void *v)
struct pg_state st = {
.seq = m,
.marker = address_markers,
-   .start_address = PAGE_OFFSET,
+   .start_address = IS_ENABLED(CONFIG_PPC64) ? PAGE_OFFSET : 
TASK_SIZE,
};
 
 #ifdef CONFIG_PPC64
@@ -429,7 +433,7 @@ void ptdump_check_wx(void)
.seq = NULL,
.marker = address_markers,
.check_wx = true,
-   .start_address = PAGE_OFFSET,
+   .start_address = IS_ENABLED(CONFIG_PPC64) ? PAGE_OFFSET : 
TASK_SIZE,
};
 
 #ifdef CONFIG_PPC64
-- 
2.25.0



[PATCH v2 0/6] powerpc/32s: Allocate modules outside of vmalloc space for STRICT_KERNEL_RWX

2020-06-29 Thread Christophe Leroy
On book3s32 (hash), exec protection is set per 256Mb segments with NX bit.
Instead of clearing NX bit on vmalloc space when CONFIG_MODULES is selected,
allocate modules in a dedicated segment (0xb000-0xbfff by default).
This allows to keep exec protection on vmalloc space while allowing exec
on modules.

v2:
- Removed the two patches that fix ptdump. Will submitted independently
- Only changing the user/kernel boundary for PPC32 now.
- Reordered the patches inside the series.

Christophe Leroy (6):
  powerpc/lib: Prepare code-patching for modules allocated outside
vmalloc space
  powerpc: Use MODULES_VADDR if defined
  powerpc/32s: Only leave NX unset on segments used for modules
  powerpc/32: Set user/kernel boundary at TASK_SIZE instead of
PAGE_OFFSET
  powerpc/32s: Kernel space starts at TASK_SIZE
  powerpc/32s: Use dedicated segment for modules with STRICT_KERNEL_RWX

 arch/powerpc/Kconfig |  1 +
 arch/powerpc/include/asm/book3s/32/pgtable.h | 15 +--
 arch/powerpc/include/asm/page.h  |  4 +++-
 arch/powerpc/kernel/head_32.S| 12 ++--
 arch/powerpc/kernel/module.c | 11 +++
 arch/powerpc/lib/code-patching.c |  2 +-
 arch/powerpc/mm/book3s32/hash_low.S  |  2 +-
 arch/powerpc/mm/book3s32/mmu.c   | 17 ++---
 arch/powerpc/mm/kasan/kasan_init_32.c|  6 ++
 arch/powerpc/mm/ptdump/ptdump.c  | 16 ++--
 10 files changed, 62 insertions(+), 24 deletions(-)

-- 
2.25.0



Re: [PATCH 13/13] powerpc/dma: Remove dev->archdata.iommu_domain

2020-06-29 Thread Michael Ellerman
Joerg Roedel  writes:
> From: Joerg Roedel 
>
> There are no users left, so remove the pointer and save some memory.
>
> Signed-off-by: Joerg Roedel 
> ---
>  arch/powerpc/include/asm/device.h | 3 ---
>  1 file changed, 3 deletions(-)

It's a little hard to confirm there are no users left just with grep,
but I think you've got them all, and the compiler should tell us if
you've missed any.

Acked-by: Michael Ellerman  (powerpc)

cheers

> diff --git a/arch/powerpc/include/asm/device.h 
> b/arch/powerpc/include/asm/device.h
> index 266542769e4b..1bc595213338 100644
> --- a/arch/powerpc/include/asm/device.h
> +++ b/arch/powerpc/include/asm/device.h
> @@ -34,9 +34,6 @@ struct dev_archdata {
>   struct iommu_table  *iommu_table_base;
>  #endif
>  
> -#ifdef CONFIG_IOMMU_API
> - void*iommu_domain;
> -#endif
>  #ifdef CONFIG_PPC64
>   struct pci_dn   *pci_data;
>  #endif
> -- 
> 2.27.0


Re: [PATCH] powerpc: Warn about use of smt_snooze_delay

2020-06-29 Thread Gautham R Shenoy
On Thu, Jun 25, 2020 at 07:33:49PM +0930, Joel Stanley wrote:
> It's not done anything for a long time. Save the percpu variable, and
> emit a warning to remind users to not expect it to do anything.
> 
> Signed-off-by: Joel Stanley 

The only known user of "smt_snooze_delay" is the "ppc64_cpu" which
uses the presence of this file to assume that the system is SMT
capable.

Since we have "/sys/devices/system/cpu/smt/" these days, perhaps the
userspace utility can use that and we can get rid of the file
altogether ?

FWIW,
Acked-by: Gautham R. Shenoy 
> ---
>  arch/powerpc/kernel/sysfs.c | 41 +
>  1 file changed, 14 insertions(+), 27 deletions(-)
> 
> diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
> index 571b3259697e..530ae92bc46d 100644
> --- a/arch/powerpc/kernel/sysfs.c
> +++ b/arch/powerpc/kernel/sysfs.c
> @@ -32,29 +32,25 @@
>  
>  static DEFINE_PER_CPU(struct cpu, cpu_devices);
>  
> -/*
> - * SMT snooze delay stuff, 64-bit only for now
> - */
> -
>  #ifdef CONFIG_PPC64
>  
> -/* Time in microseconds we delay before sleeping in the idle loop */
> -static DEFINE_PER_CPU(long, smt_snooze_delay) = { 100 };
> +/*
> + * Snooze delay has not been hooked up since 3fa8cad82b94 
> ("powerpc/pseries/cpuidle:
> + * smt-snooze-delay cleanup.") and has been broken even longer. As was 
> foretold in
> + * 2014:
> + *
> + *  "ppc64_util currently utilises it. Once we fix ppc64_util, propose to 
> clean
> + *  up the kernel code."
> + *
> + * At some point in the future this code should be removed.
> + */
>  
>  static ssize_t store_smt_snooze_delay(struct device *dev,
> struct device_attribute *attr,
> const char *buf,
> size_t count)
>  {
> - struct cpu *cpu = container_of(dev, struct cpu, dev);
> - ssize_t ret;
> - long snooze;
> -
> - ret = sscanf(buf, "%ld", );
> - if (ret != 1)
> - return -EINVAL;
> -
> - per_cpu(smt_snooze_delay, cpu->dev.id) = snooze;
> + WARN_ON_ONCE("smt_snooze_delay sysfs file has no effect\n");
>   return count;
>  }
>  
> @@ -62,9 +58,9 @@ static ssize_t show_smt_snooze_delay(struct device *dev,
>struct device_attribute *attr,
>char *buf)
>  {
> - struct cpu *cpu = container_of(dev, struct cpu, dev);
> + WARN_ON_ONCE("smt_snooze_delay sysfs file has no effect\n");
>  
> - return sprintf(buf, "%ld\n", per_cpu(smt_snooze_delay, cpu->dev.id));
> + return sprintf(buf, "100\n");
>  }
>  
>  static DEVICE_ATTR(smt_snooze_delay, 0644, show_smt_snooze_delay,
> @@ -72,16 +68,7 @@ static DEVICE_ATTR(smt_snooze_delay, 0644, 
> show_smt_snooze_delay,
>  
>  static int __init setup_smt_snooze_delay(char *str)
>  {
> - unsigned int cpu;
> - long snooze;
> -
> - if (!cpu_has_feature(CPU_FTR_SMT))
> - return 1;
> -
> - snooze = simple_strtol(str, NULL, 10);
> - for_each_possible_cpu(cpu)
> - per_cpu(smt_snooze_delay, cpu) = snooze;
> -
> + WARN_ON_ONCE("smt-snooze-delay command line option has no effect\n");
>   return 1;
>  }
>  __setup("smt-snooze-delay=", setup_smt_snooze_delay);
> -- 
> 2.27.0
> 


[PATCH 1/3] powerpc/cacheinfo: Use cpumap_print to print cpumap

2020-06-29 Thread Srikar Dronamraju
Tejun Heo had modified shared_cpu_map_show to use scnprintf instead of
cpumap_print during support for *pb[l] format.
Refer commit 0c118b7bd09a ("powerpc: use %*pb[l] to print bitmaps including
cpumasks and nodemasks")

cpumap_print_to_pagebuf is a standard function to print cpumap.  With
commit 9cf79d115f0d ("bitmap: remove explicit newline handling using
scnprintf format string"), there is no need to print explicit newline and
trailing null character. cpumap_print_to_pagebuf internally uses
scnprintf. Hence replace scnprintf with cpumap_print_to_pagebuf.

Note: shared_cpu_map_show in drivers/base/cacheinfo.c already uses
cpumap_print_to_pagebuf.

Before this patch
# cat /sys/devices/system/cpu0/cache/index1/shared_cpu_map
00ff

#
(Notice the extra blank line).

After this patch
# cat /sys/devices/system/cpu0/cache/index1/shared_cpu_map
00ff
#

Cc: Nathan Lynch 
Cc: Tejun Heo 
Cc: Michael Ellerman 
Cc: linuxppc-dev@lists.ozlabs.org
Signed-off-by: Srikar Dronamraju 
---
 arch/powerpc/kernel/cacheinfo.c | 8 ++--
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c
index 470336277c67..0d3c45e2fccd 100644
--- a/arch/powerpc/kernel/cacheinfo.c
+++ b/arch/powerpc/kernel/cacheinfo.c
@@ -652,7 +652,7 @@ static ssize_t shared_cpu_map_show(struct kobject *k, 
struct kobj_attribute *att
struct cache_index_dir *index;
struct cache *cache;
const struct cpumask *mask;
-   int ret, cpu;
+   int cpu;
 
index = kobj_to_cache_index_dir(k);
cache = index->cache;
@@ -664,11 +664,7 @@ static ssize_t shared_cpu_map_show(struct kobject *k, 
struct kobj_attribute *att
mask  = >shared_cpu_map;
}
 
-   ret = scnprintf(buf, PAGE_SIZE - 1, "%*pb\n",
-   cpumask_pr_args(mask));
-   buf[ret++] = '\n';
-   buf[ret] = '\0';
-   return ret;
+   return cpumap_print_to_pagebuf(false, buf, mask);
 }
 
 static struct kobj_attribute cache_shared_cpu_map_attr =
-- 
2.17.1



[PATCH 3/3] powerpc/cacheinfo: Add per cpu per index shared_cpu_list

2020-06-29 Thread Srikar Dronamraju
Unlike drivers/base/cacheinfo, powerpc cacheinfo code is not exposing
shared_cpu_list under /sys/devices/system/cpu/cpu/cache/index

Add shared_cpu_list to per cpu per index directory to maintain parity
with x86.  Some scripts (example: mmtests
https://github.com/gormanm/mmtests) seem to be looking for
shared_cpu_list instead of shared_cpu_map.

Before this patch
# ls /sys/devices/system/cpu0/cache/index1
coherency_line_size  number_of_sets  size  ways_of_associativity
levelshared_cpu_map  type
# cat /sys/devices/system/cpu0/cache/index1/shared_cpu_map
00ff
#

After this patch
# ls /sys/devices/system/cpu0/cache/index1
coherency_line_size  number_of_sets   shared_cpu_map  type
levelshared_cpu_list  sizeways_of_associativity
# cat /sys/devices/system/cpu0/cache/index1/shared_cpu_map
00ff
# cat /sys/devices/system/cpu0/cache/index1/shared_cpu_list
0-7
#

Cc: Nathan Lynch 
Cc: Michael Ellerman 
Cc: linuxppc-dev@lists.ozlabs.org
Signed-off-by: Srikar Dronamraju 
---
 arch/powerpc/kernel/cacheinfo.c | 11 ++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c
index 5be870f99623..d8d4552af30a 100644
--- a/arch/powerpc/kernel/cacheinfo.c
+++ b/arch/powerpc/kernel/cacheinfo.c
@@ -670,12 +670,20 @@ show_shared_cpumap(struct kobject *k, struct 
kobj_attribute *attr, char *buf, bo
 
 static ssize_t shared_cpu_map_show(struct kobject *k, struct kobj_attribute 
*attr, char *buf)
 {
-   return show_shared_cpumap(k, attr, buf, false)
+   return show_shared_cpumap(k, attr, buf, false);
+}
+
+static ssize_t shared_cpu_list_show(struct kobject *k, struct kobj_attribute 
*attr, char *buf)
+{
+   return show_shared_cpumap(k, attr, buf, true);
 }
 
 static struct kobj_attribute cache_shared_cpu_map_attr =
__ATTR(shared_cpu_map, 0444, shared_cpu_map_show, NULL);
 
+static struct kobj_attribute cache_shared_cpu_list_attr =
+   __ATTR(shared_cpu_list, 0444, shared_cpu_list_show, NULL);
+
 /* Attributes which should always be created -- the kobject/sysfs core
  * does this automatically via kobj_type->default_attrs.  This is the
  * minimum data required to uniquely identify a cache.
@@ -684,6 +692,7 @@ static struct attribute *cache_index_default_attrs[] = {
_type_attr.attr,
_level_attr.attr,
_shared_cpu_map_attr.attr,
+   _shared_cpu_list_attr.attr,
NULL,
 };
 
-- 
2.17.1



[PATCH 2/3] powerpc/cacheinfo: Make cpumap_show code reusable

2020-06-29 Thread Srikar Dronamraju
In anticipation of implementing shared_cpu_list, move code under
shared_cpu_map_show to a common function.

No functional changes.

Cc: Nathan Lynch 
Cc: Michael Ellerman 
Cc: linuxppc-dev@lists.ozlabs.org
Signed-off-by: Srikar Dronamraju 
---
 arch/powerpc/kernel/cacheinfo.c | 10 --
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c
index 0d3c45e2fccd..5be870f99623 100644
--- a/arch/powerpc/kernel/cacheinfo.c
+++ b/arch/powerpc/kernel/cacheinfo.c
@@ -647,7 +647,8 @@ static const struct cpumask 
*get_big_core_shared_cpu_map(int cpu, struct cache *
return >shared_cpu_map;
 }
 
-static ssize_t shared_cpu_map_show(struct kobject *k, struct kobj_attribute 
*attr, char *buf)
+static ssize_t
+show_shared_cpumap(struct kobject *k, struct kobj_attribute *attr, char *buf, 
bool list)
 {
struct cache_index_dir *index;
struct cache *cache;
@@ -664,7 +665,12 @@ static ssize_t shared_cpu_map_show(struct kobject *k, 
struct kobj_attribute *att
mask  = >shared_cpu_map;
}
 
-   return cpumap_print_to_pagebuf(false, buf, mask);
+   return cpumap_print_to_pagebuf(list, buf, mask);
+}
+
+static ssize_t shared_cpu_map_show(struct kobject *k, struct kobj_attribute 
*attr, char *buf)
+{
+   return show_shared_cpumap(k, attr, buf, false)
 }
 
 static struct kobj_attribute cache_shared_cpu_map_attr =
-- 
2.17.1



  1   2   >