[PATCH v5 5/5] powerpc/sstep: Add testcases for VSX vector paired load/store instructions
From: Balamuruhan S Add testcases for VSX vector paired load/store instructions. Sample o/p: emulate_step_test: lxvp : PASS emulate_step_test: stxvp : PASS emulate_step_test: lxvpx : PASS emulate_step_test: stxvpx : PASS emulate_step_test: plxvp : PASS emulate_step_test: pstxvp : PASS Signed-off-by: Balamuruhan S Signed-off-by: Ravi Bangoria --- arch/powerpc/lib/test_emulate_step.c | 270 +++ 1 file changed, 270 insertions(+) diff --git a/arch/powerpc/lib/test_emulate_step.c b/arch/powerpc/lib/test_emulate_step.c index 0a201b771477..783d1b85ecfe 100644 --- a/arch/powerpc/lib/test_emulate_step.c +++ b/arch/powerpc/lib/test_emulate_step.c @@ -612,6 +612,273 @@ static void __init test_lxvd2x_stxvd2x(void) } #endif /* CONFIG_VSX */ +#ifdef CONFIG_VSX +static void __init test_lxvp_stxvp(void) +{ + struct pt_regs regs; + union { + vector128 a; + u32 b[4]; + } c[2]; + u32 cached_b[8]; + int stepped = -1; + + if (!cpu_has_feature(CPU_FTR_ARCH_31)) { + show_result("lxvp", "SKIP (!CPU_FTR_ARCH_31)"); + show_result("stxvp", "SKIP (!CPU_FTR_ARCH_31)"); + return; + } + + init_pt_regs(®s); + + /*** lxvp ***/ + + cached_b[0] = c[0].b[0] = 18233; + cached_b[1] = c[0].b[1] = 34863571; + cached_b[2] = c[0].b[2] = 834; + cached_b[3] = c[0].b[3] = 6138911; + cached_b[4] = c[1].b[0] = 1234; + cached_b[5] = c[1].b[1] = 5678; + cached_b[6] = c[1].b[2] = 91011; + cached_b[7] = c[1].b[3] = 121314; + + regs.gpr[4] = (unsigned long)&c[0].a; + + /* +* lxvp XTp,DQ(RA) +* XTp = 32xTX + 2xTp +* let TX=1 Tp=1 RA=4 DQ=0 +*/ + stepped = emulate_step(®s, ppc_inst(PPC_RAW_LXVP(34, 4, 0))); + + if (stepped == 1 && cpu_has_feature(CPU_FTR_VSX)) { + show_result("lxvp", "PASS"); + } else { + if (!cpu_has_feature(CPU_FTR_VSX)) + show_result("lxvp", "PASS (!CPU_FTR_VSX)"); + else + show_result("lxvp", "FAIL"); + } + + /*** stxvp ***/ + + c[0].b[0] = 21379463; + c[0].b[1] = 87; + c[0].b[2] = 374234; + c[0].b[3] = 4; + c[1].b[0] = 90; + c[1].b[1] = 122; + c[1].b[2] = 555; + c[1].b[3] = 32144; + + /* +* stxvp XSp,DQ(RA) +* XSp = 32xSX + 2xSp +* let SX=1 Sp=1 RA=4 DQ=0 +*/ + stepped = emulate_step(®s, ppc_inst(PPC_RAW_STXVP(34, 4, 0))); + + if (stepped == 1 && cached_b[0] == c[0].b[0] && cached_b[1] == c[0].b[1] && + cached_b[2] == c[0].b[2] && cached_b[3] == c[0].b[3] && + cached_b[4] == c[1].b[0] && cached_b[5] == c[1].b[1] && + cached_b[6] == c[1].b[2] && cached_b[7] == c[1].b[3] && + cpu_has_feature(CPU_FTR_VSX)) { + show_result("stxvp", "PASS"); + } else { + if (!cpu_has_feature(CPU_FTR_VSX)) + show_result("stxvp", "PASS (!CPU_FTR_VSX)"); + else + show_result("stxvp", "FAIL"); + } +} +#else +static void __init test_lxvp_stxvp(void) +{ + show_result("lxvp", "SKIP (CONFIG_VSX is not set)"); + show_result("stxvp", "SKIP (CONFIG_VSX is not set)"); +} +#endif /* CONFIG_VSX */ + +#ifdef CONFIG_VSX +static void __init test_lxvpx_stxvpx(void) +{ + struct pt_regs regs; + union { + vector128 a; + u32 b[4]; + } c[2]; + u32 cached_b[8]; + int stepped = -1; + + if (!cpu_has_feature(CPU_FTR_ARCH_31)) { + show_result("lxvpx", "SKIP (!CPU_FTR_ARCH_31)"); + show_result("stxvpx", "SKIP (!CPU_FTR_ARCH_31)"); + return; + } + + init_pt_regs(®s); + + /*** lxvpx ***/ + + cached_b[0] = c[0].b[0] = 18233; + cached_b[1] = c[0].b[1] = 34863571; + cached_b[2] = c[0].b[2] = 834; + cached_b[3] = c[0].b[3] = 6138911; + cached_b[4] = c[1].b[0] = 1234; + cached_b[5] = c[1].b[1] = 5678; + cached_b[6] = c[1].b[2] = 91011; + cached_b[7] = c[1].b[3] = 121314; + + regs.gpr[3] = (unsigned long)&c[0].a; + regs.gpr[4] = 0; + + /* +* lxvpx XTp,RA,RB +* XTp = 32xTX + 2xTp +* let TX=1 Tp=1 RA=3 RB=4 +*/ + stepped = emulate_step(®s, ppc_inst(PPC_RAW_LXVPX(34, 3, 4))); + + if (stepped == 1 && cpu_has_feature(CPU_FTR_VSX)) { + show_result("lxvpx", "PASS"); + } else { + if (!cpu_has_feature(CPU_FTR_VSX)) + show_result("lxvpx", "PASS (!CPU_FTR_VSX)"); + else + show_result("lxvpx", "FAIL"); + } + + /*** stxvpx ***/ + + c[0].b[0] = 21379463; + c[0].b[1] = 87; + c[0].b[2] = 3742
[PATCH v5 4/5] powerpc/ppc-opcode: Add encoding macros for VSX vector paired instructions
From: Balamuruhan S Add instruction encodings, DQ, D0, D1 immediate, XTP, XSP operands as macros for new VSX vector paired instructions, * Load VSX Vector Paired (lxvp) * Load VSX Vector Paired Indexed (lxvpx) * Prefixed Load VSX Vector Paired (plxvp) * Store VSX Vector Paired (stxvp) * Store VSX Vector Paired Indexed (stxvpx) * Prefixed Store VSX Vector Paired (pstxvp) Suggested-by: Naveen N. Rao Signed-off-by: Balamuruhan S Signed-off-by: Ravi Bangoria --- arch/powerpc/include/asm/ppc-opcode.h | 13 + 1 file changed, 13 insertions(+) diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index a6e3700c4566..5e7918ca4fb7 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -78,6 +78,9 @@ #define IMM_L(i) ((uintptr_t)(i) & 0x) #define IMM_DS(i) ((uintptr_t)(i) & 0xfffc) +#define IMM_DQ(i) ((uintptr_t)(i) & 0xfff0) +#define IMM_D0(i) (((uintptr_t)(i) >> 16) & 0x3) +#define IMM_D1(i) IMM_L(i) /* * 16-bit immediate helper macros: HA() is for use with sign-extending instrs @@ -295,6 +298,8 @@ #define __PPC_XB(b)b) & 0x1f) << 11) | (((b) & 0x20) >> 4)) #define __PPC_XS(s)s) & 0x1f) << 21) | (((s) & 0x20) >> 5)) #define __PPC_XT(s)__PPC_XS(s) +#define __PPC_XSP(s) s) & 0x1e) | (((s) >> 5) & 0x1)) << 21) +#define __PPC_XTP(s) __PPC_XSP(s) #define __PPC_T_TLB(t) (((t) & 0x3) << 21) #define __PPC_WC(w)(((w) & 0x3) << 21) #define __PPC_WS(w)(((w) & 0x1f) << 11) @@ -395,6 +400,14 @@ #define PPC_RAW_XVCPSGNDP(t, a, b) ((0xf780 | VSX_XX3((t), (a), (b #define PPC_RAW_VPERMXOR(vrt, vra, vrb, vrc) \ ((0x102d | ___PPC_RT(vrt) | ___PPC_RA(vra) | ___PPC_RB(vrb) | (((vrc) & 0x1f) << 6))) +#define PPC_RAW_LXVP(xtp, a, i)(0x1800 | __PPC_XTP(xtp) | ___PPC_RA(a) | IMM_DQ(i)) +#define PPC_RAW_STXVP(xsp, a, i) (0x1801 | __PPC_XSP(xsp) | ___PPC_RA(a) | IMM_DQ(i)) +#define PPC_RAW_LXVPX(xtp, a, b) (0x7c00029a | __PPC_XTP(xtp) | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_STXVPX(xsp, a, b) (0x7c00039a | __PPC_XSP(xsp) | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_PLXVP(xtp, i, a, pr) \ + ((PPC_PREFIX_8LS | __PPC_PRFX_R(pr) | IMM_D0(i)) << 32 | (0xe800 | __PPC_XTP(xtp) | ___PPC_RA(a) | IMM_D1(i))) +#define PPC_RAW_PSTXVP(xsp, i, a, pr) \ + ((PPC_PREFIX_8LS | __PPC_PRFX_R(pr) | IMM_D0(i)) << 32 | (0xf800 | __PPC_XSP(xsp) | ___PPC_RA(a) | IMM_D1(i))) #define PPC_RAW_NAP(0x4c000364) #define PPC_RAW_SLEEP (0x4c0003a4) #define PPC_RAW_WINKLE (0x4c0003e4) -- 2.26.2
[PATCH v5 3/5] powerpc/sstep: Support VSX vector paired storage access instructions
From: Balamuruhan S VSX Vector Paired instructions loads/stores an octword (32 bytes) from/to storage into two sequential VSRs. Add emulation support for these new instructions: * Load VSX Vector Paired (lxvp) * Load VSX Vector Paired Indexed (lxvpx) * Prefixed Load VSX Vector Paired (plxvp) * Store VSX Vector Paired (stxvp) * Store VSX Vector Paired Indexed (stxvpx) * Prefixed Store VSX Vector Paired (pstxvp) Suggested-by: Naveen N. Rao Signed-off-by: Balamuruhan S Signed-off-by: Ravi Bangoria [kernel test robot reported a build failure] Reported-by: kernel test robot --- arch/powerpc/lib/sstep.c | 150 +-- 1 file changed, 129 insertions(+), 21 deletions(-) diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index faf0bbf3efb7..96ca813a65e7 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -32,6 +32,10 @@ extern char system_call_vectored_emulate[]; #define XER_OV32 0x0008U #define XER_CA32 0x0004U +#ifdef CONFIG_VSX +#define VSX_REGISTER_XTP(rd) rd) & 1) << 5) | ((rd) & 0xfe)) +#endif + #ifdef CONFIG_PPC_FPU /* * Functions in ldstfp.S @@ -279,6 +283,19 @@ static nokprobe_inline void do_byte_reverse(void *ptr, int nb) up[1] = tmp; break; } + case 32: { + unsigned long *up = (unsigned long *)ptr; + unsigned long tmp; + + tmp = byterev_8(up[0]); + up[0] = byterev_8(up[3]); + up[3] = tmp; + tmp = byterev_8(up[2]); + up[2] = byterev_8(up[1]); + up[1] = tmp; + break; + } + #endif default: WARN_ON_ONCE(1); @@ -709,6 +726,8 @@ void emulate_vsx_load(struct instruction_op *op, union vsx_reg *reg, reg->d[0] = reg->d[1] = 0; switch (op->element_size) { + case 32: + /* [p]lxvp[x] */ case 16: /* whole vector; lxv[x] or lxvl[l] */ if (size == 0) @@ -717,7 +736,7 @@ void emulate_vsx_load(struct instruction_op *op, union vsx_reg *reg, if (IS_LE && (op->vsx_flags & VSX_LDLEFT)) rev = !rev; if (rev) - do_byte_reverse(reg, 16); + do_byte_reverse(reg, size); break; case 8: /* scalar loads, lxvd2x, lxvdsx */ @@ -793,6 +812,20 @@ void emulate_vsx_store(struct instruction_op *op, const union vsx_reg *reg, size = GETSIZE(op->type); switch (op->element_size) { + case 32: + /* [p]stxvp[x] */ + if (size == 0) + break; + if (rev) { + /* reverse 32 bytes */ + buf.d[0] = byterev_8(reg->d[3]); + buf.d[1] = byterev_8(reg->d[2]); + buf.d[2] = byterev_8(reg->d[1]); + buf.d[3] = byterev_8(reg->d[0]); + reg = &buf; + } + memcpy(mem, reg, size); + break; case 16: /* stxv, stxvx, stxvl, stxvll */ if (size == 0) @@ -861,28 +894,43 @@ static nokprobe_inline int do_vsx_load(struct instruction_op *op, bool cross_endian) { int reg = op->reg; - u8 mem[16]; - union vsx_reg buf; + int i, j, nr_vsx_regs; + u8 mem[32]; + union vsx_reg buf[2]; int size = GETSIZE(op->type); if (!address_ok(regs, ea, size) || copy_mem_in(mem, ea, size, regs)) return -EFAULT; - emulate_vsx_load(op, &buf, mem, cross_endian); + nr_vsx_regs = size / sizeof(__vector128); + emulate_vsx_load(op, buf, mem, cross_endian); preempt_disable(); if (reg < 32) { /* FP regs + extensions */ if (regs->msr & MSR_FP) { - load_vsrn(reg, &buf); + for (i = 0; i < nr_vsx_regs; i++) { + j = IS_LE ? nr_vsx_regs - i - 1 : i; + load_vsrn(reg + i, &buf[j].v); + } } else { - current->thread.fp_state.fpr[reg][0] = buf.d[0]; - current->thread.fp_state.fpr[reg][1] = buf.d[1]; + for (i = 0; i < nr_vsx_regs; i++) { + j = IS_LE ? nr_vsx_regs - i - 1 : i; + current->thread.fp_state.fpr[reg + i][0] = buf[j].d[0]; + current->thread.fp_state.fpr[reg + i][1] = buf[j].d[1]; + } } } else { - if (regs->msr & MSR_VEC) - load_vsrn(reg, &buf); - else - current->thread.vr_state.vr[reg - 32] = buf
[PATCH v5 2/5] powerpc/sstep: Cover new VSX instructions under CONFIG_VSX
Recently added Power10 prefixed VSX instruction are included unconditionally in the kernel. If they are executed on a machine without VSX support, it might create issues. Fix that. Also fix one mnemonics spelling mistake in comment. Fixes: 50b80a12e4cc ("powerpc sstep: Add support for prefixed load/stores") Signed-off-by: Ravi Bangoria --- arch/powerpc/lib/sstep.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index e6242744c71b..faf0bbf3efb7 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -2757,6 +2757,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, case 41:/* plwa */ op->type = MKOP(LOAD, PREFIXED | SIGNEXT, 4); break; +#ifdef CONFIG_VSX case 42:/* plxsd */ op->reg = rd + 32; op->type = MKOP(LOAD_VSX, PREFIXED, 8); @@ -2797,13 +2798,14 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, op->element_size = 16; op->vsx_flags = VSX_CHECK_VEC; break; +#endif /* CONFIG_VSX */ case 56:/* plq */ op->type = MKOP(LOAD, PREFIXED, 16); break; case 57:/* pld */ op->type = MKOP(LOAD, PREFIXED, 8); break; - case 60:/* stq */ + case 60:/* pstq */ op->type = MKOP(STORE, PREFIXED, 16); break; case 61:/* pstd */ -- 2.26.2
[PATCH v5 1/5] powerpc/sstep: Emulate prefixed instructions only when CPU_FTR_ARCH_31 is set
From: Balamuruhan S Unconditional emulation of prefixed instructions will allow emulation of them on Power10 predecessors which might cause issues. Restrict that. Fixes: 3920742b92f5 ("powerpc sstep: Add support for prefixed fixed-point arithmetic") Fixes: 50b80a12e4cc ("powerpc sstep: Add support for prefixed load/stores") Signed-off-by: Balamuruhan S Signed-off-by: Ravi Bangoria --- arch/powerpc/lib/sstep.c | 6 ++ 1 file changed, 6 insertions(+) diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index e9dcaba9a4f8..e6242744c71b 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -1346,6 +1346,9 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, switch (opcode) { #ifdef __powerpc64__ case 1: + if (!cpu_has_feature(CPU_FTR_ARCH_31)) + return -1; + prefix_r = GET_PREFIX_R(word); ra = GET_PREFIX_RA(suffix); rd = (suffix >> 21) & 0x1f; @@ -2733,6 +2736,9 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, } break; case 1: /* Prefixed instructions */ + if (!cpu_has_feature(CPU_FTR_ARCH_31)) + return -1; + prefix_r = GET_PREFIX_R(word); ra = GET_PREFIX_RA(suffix); op->update_reg = ra; -- 2.26.2
[PATCH v5 0/5] powerpc/sstep: VSX 32-byte vector paired load/store instructions
VSX vector paired instructions operates with octword (32-byte) operand for loads and stores between storage and a pair of two sequential Vector-Scalar Registers (VSRs). There are 4 word instructions and 2 prefixed instructions that provides this 32-byte storage access operations - lxvp, lxvpx, stxvp, stxvpx, plxvp, pstxvp. Emulation infrastructure doesn't have support for these instructions, to operate with 32-byte storage access and to operate with 2 VSX registers. This patch series enables the instruction emulation support and adds test cases for them respectively. v4: https://lore.kernel.org/r/20201008072726.233086-1-ravi.bango...@linux.ibm.com Changes in v5: - * Fix build breakage reported by Kernel test robote * Patch #2 is new. CONFIG_VSX check was missing for some VSX instructions. Patch #2 adds that check. Changes in v4: - * Patch #1 is (kind of) new. * Patch #2 now enables both analyse_instr() and emulate_step() unlike prev series where both were in separate patches. * Patch #2 also has important fix for emulation on LE. * Patch #3 and #4. Added XSP/XTP, D0/D1 instruction operands, removed *_EX_OP, __PPC_T[P][X] macros which are incorrect, and adhered to PPC_RAW_* convention. * Added `CPU_FTR_ARCH_31` check in testcases to avoid failing in p8/p9. * Some consmetic changes. * Rebased to powerpc/next Changes in v3: - Worked on review comments and suggestions from Ravi and Naveen, * Fix the do_vsx_load() to handle vsx instructions if MSR_FP/MSR_VEC cleared in exception conditions and it reaches to read/write to thread_struct member fp_state/vr_state respectively. * Fix wrongly used `__vector128 v[2]` in struct vsx_reg as it should hold a single vsx register size. * Remove unnecessary `VSX_CHECK_VEC` flag set and condition to check `VSX_LDLEFT` that is not applicable for these vsx instructions. * Fix comments in emulate_vsx_load() that were misleading. * Rebased on latest powerpc next branch. Changes in v2: - * Fix suggestion from Sandipan, wrap ISA 3.1 instructions with cpu_has_feature(CPU_FTR_ARCH_31) check. Balamuruhan S (4): powerpc/sstep: Emulate prefixed instructions only when CPU_FTR_ARCH_31 is set powerpc/sstep: Support VSX vector paired storage access instructions powerpc/ppc-opcode: Add encoding macros for VSX vector paired instructions powerpc/sstep: Add testcases for VSX vector paired load/store instructions Ravi Bangoria (1): powerpc/sstep: Cover new VSX instructions under CONFIG_VSX arch/powerpc/include/asm/ppc-opcode.h | 13 ++ arch/powerpc/lib/sstep.c | 160 --- arch/powerpc/lib/test_emulate_step.c | 270 ++ 3 files changed, 421 insertions(+), 22 deletions(-) -- 2.26.2
Re: [PATCH RFC PKS/PMEM 09/58] drivers/gpu: Utilize new kmap_thread()
On Sat, Oct 10, 2020 at 12:03:49AM +0200, Daniel Vetter wrote: > On Fri, Oct 09, 2020 at 12:49:44PM -0700, ira.we...@intel.com wrote: > > From: Ira Weiny > > > > These kmap() calls in the gpu stack are localized to a single thread. > > To avoid the over head of global PKRS updates use the new kmap_thread() > > call. > > > > Cc: David Airlie > > Cc: Daniel Vetter > > Cc: Patrik Jakobsson > > Signed-off-by: Ira Weiny > > I'm guessing the entire pile goes in through some other tree. > Apologies for not realizing there were multiple maintainers here. But, I was thinking it would land together through the mm tree once the core support lands. I've tried to split these out in a way they can be easily reviewed/acked by the correct developers. > If so: > > Acked-by: Daniel Vetter > > If you want this to land through maintainer trees, then we need a > per-driver split (since aside from amdgpu and radeon they're all different > subtrees). It is just RFC for the moment. I need to get the core support accepted first then this can land. > > btw the two kmap calls in drm you highlight in the cover letter should > also be convertible to kmap_thread. We only hold vmalloc mappings for a > longer time (or it'd be quite a driver bug). So if you want maybe throw > those two as two additional patches on top, and we can do some careful > review & testing for them. Cool. I'll add them in. Ira > -Daniel > > > --- > > drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 12 ++-- > > drivers/gpu/drm/gma500/gma_display.c | 4 ++-- > > drivers/gpu/drm/gma500/mmu.c | 10 +- > > drivers/gpu/drm/i915/gem/i915_gem_shmem.c| 4 ++-- > > .../gpu/drm/i915/gem/selftests/i915_gem_context.c| 4 ++-- > > drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c | 8 > > drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c | 4 ++-- > > drivers/gpu/drm/i915/gt/intel_gtt.c | 4 ++-- > > drivers/gpu/drm/i915/gt/shmem_utils.c| 4 ++-- > > drivers/gpu/drm/i915/i915_gem.c | 8 > > drivers/gpu/drm/i915/i915_gpu_error.c| 4 ++-- > > drivers/gpu/drm/i915/selftests/i915_perf.c | 4 ++-- > > drivers/gpu/drm/radeon/radeon_ttm.c | 4 ++-- > > 13 files changed, 37 insertions(+), 37 deletions(-) > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > > index 978bae731398..bd564bccb7a3 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > > @@ -2437,11 +2437,11 @@ static ssize_t amdgpu_ttm_gtt_read(struct file *f, > > char __user *buf, > > > > page = adev->gart.pages[p]; > > if (page) { > > - ptr = kmap(page); > > + ptr = kmap_thread(page); > > ptr += off; > > > > r = copy_to_user(buf, ptr, cur_size); > > - kunmap(adev->gart.pages[p]); > > + kunmap_thread(adev->gart.pages[p]); > > } else > > r = clear_user(buf, cur_size); > > > > @@ -2507,9 +2507,9 @@ static ssize_t amdgpu_iomem_read(struct file *f, char > > __user *buf, > > if (p->mapping != adev->mman.bdev.dev_mapping) > > return -EPERM; > > > > - ptr = kmap(p); > > + ptr = kmap_thread(p); > > r = copy_to_user(buf, ptr + off, bytes); > > - kunmap(p); > > + kunmap_thread(p); > > if (r) > > return -EFAULT; > > > > @@ -2558,9 +2558,9 @@ static ssize_t amdgpu_iomem_write(struct file *f, > > const char __user *buf, > > if (p->mapping != adev->mman.bdev.dev_mapping) > > return -EPERM; > > > > - ptr = kmap(p); > > + ptr = kmap_thread(p); > > r = copy_from_user(ptr + off, buf, bytes); > > - kunmap(p); > > + kunmap_thread(p); > > if (r) > > return -EFAULT; > > > > diff --git a/drivers/gpu/drm/gma500/gma_display.c > > b/drivers/gpu/drm/gma500/gma_display.c > > index 3df6d6e850f5..35f4e55c941f 100644 > > --- a/drivers/gpu/drm/gma500/gma_display.c > > +++ b/drivers/gpu/drm/gma500/gma_display.c > > @@ -400,9 +400,9 @@ int gma_crtc_cursor_set(struct drm_crtc *crtc, > > /* Copy the cursor to cursor mem */ > > tmp_dst = dev_priv->vram_addr + cursor_gt->offset; > > for (i = 0; i < cursor_pages; i++) { > > - tmp_src = kmap(gt->pages[i]); > > + tmp_src = kmap_thread(gt->pages[i]); > > memcpy(tmp_dst, tmp_src, PAGE_SIZE); > > - kunmap(gt->pages[i]); > > + kunmap_thread(gt->pages[i]); > > tmp_dst += PAGE_SIZE; > > } > > > > diff --git a/d
Re: [PATCH RFC PKS/PMEM 10/58] drivers/rdma: Utilize new kmap_thread()
-ira.we...@intel.com wrote: - >To: "Andrew Morton" , "Thomas Gleixner" >, "Ingo Molnar" , "Borislav >Petkov" , "Andy Lutomirski" , "Peter >Zijlstra" >From: ira.we...@intel.com >Date: 10/09/2020 09:52PM >Cc: "Ira Weiny" , "Mike Marciniszyn" >, "Dennis Dalessandro" >, "Doug Ledford" , >"Jason Gunthorpe" , "Faisal Latif" >, "Shiraz Saleem" , >"Bernard Metzler" , x...@kernel.org, "Dave Hansen" >, "Dan Williams" >, "Fenghua Yu" , >linux-...@vger.kernel.org, linux-ker...@vger.kernel.org, >linux-nvd...@lists.01.org, linux-fsde...@vger.kernel.org, >linux...@kvack.org, linux-kselft...@vger.kernel.org, >linuxppc-dev@lists.ozlabs.org, k...@vger.kernel.org, >net...@vger.kernel.org, b...@vger.kernel.org, >ke...@lists.infradead.org, linux-bca...@vger.kernel.org, >linux-...@lists.infradead.org, de...@driverdev.osuosl.org, >linux-...@vger.kernel.org, linux-...@vger.kernel.org, >linux-s...@vger.kernel.org, target-de...@vger.kernel.org, >linux-...@vger.kernel.org, ceph-de...@vger.kernel.org, >linux-e...@vger.kernel.org, linux-...@kvack.org, >io-ur...@vger.kernel.org, linux-er...@lists.ozlabs.org, >linux...@lists.infradead.org, linux-ntfs-...@lists.sourceforge.net, >reiserfs-de...@vger.kernel.org, >linux-f2fs-de...@lists.sourceforge.net, linux-ni...@vger.kernel.org, >cluster-de...@redhat.com, ecryp...@vger.kernel.org, >linux-c...@vger.kernel.org, linux-bt...@vger.kernel.org, >linux-...@lists.infradead.org, linux-r...@vger.kernel.org, >amd-...@lists.freedesktop.org, dri-de...@lists.freedesktop.org, >intel-...@lists.freedesktop.org, drbd-...@tron.linbit.com, >linux-bl...@vger.kernel.org, xen-de...@lists.xenproject.org, >linux-cach...@redhat.com, samba-techni...@lists.samba.org, >intel-wired-...@lists.osuosl.org >Subject: [EXTERNAL] [PATCH RFC PKS/PMEM 10/58] drivers/rdma: Utilize >new kmap_thread() > >From: Ira Weiny > >The kmap() calls in these drivers are localized to a single thread. >To >avoid the over head of global PKRS updates use the new kmap_thread() >call. > >Cc: Mike Marciniszyn >Cc: Dennis Dalessandro >Cc: Doug Ledford >Cc: Jason Gunthorpe >Cc: Faisal Latif >Cc: Shiraz Saleem >Cc: Bernard Metzler >Signed-off-by: Ira Weiny >--- > drivers/infiniband/hw/hfi1/sdma.c | 4 ++-- > drivers/infiniband/hw/i40iw/i40iw_cm.c | 10 +- > drivers/infiniband/sw/siw/siw_qp_tx.c | 14 +++--- > 3 files changed, 14 insertions(+), 14 deletions(-) > >diff --git a/drivers/infiniband/hw/hfi1/sdma.c >b/drivers/infiniband/hw/hfi1/sdma.c >index 04575c9afd61..09d206e3229a 100644 >--- a/drivers/infiniband/hw/hfi1/sdma.c >+++ b/drivers/infiniband/hw/hfi1/sdma.c >@@ -3130,7 +3130,7 @@ int ext_coal_sdma_tx_descs(struct hfi1_devdata >*dd, struct sdma_txreq *tx, > } > > if (type == SDMA_MAP_PAGE) { >- kvaddr = kmap(page); >+ kvaddr = kmap_thread(page); > kvaddr += offset; > } else if (WARN_ON(!kvaddr)) { > __sdma_txclean(dd, tx); >@@ -3140,7 +3140,7 @@ int ext_coal_sdma_tx_descs(struct hfi1_devdata >*dd, struct sdma_txreq *tx, > memcpy(tx->coalesce_buf + tx->coalesce_idx, kvaddr, len); > tx->coalesce_idx += len; > if (type == SDMA_MAP_PAGE) >- kunmap(page); >+ kunmap_thread(page); > > /* If there is more data, return */ > if (tx->tlen - tx->coalesce_idx) >diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c >b/drivers/infiniband/hw/i40iw/i40iw_cm.c >index a3b95805c154..122d7a5642a1 100644 >--- a/drivers/infiniband/hw/i40iw/i40iw_cm.c >+++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c >@@ -3721,7 +3721,7 @@ int i40iw_accept(struct iw_cm_id *cm_id, struct >iw_cm_conn_param *conn_param) > ibmr->device = iwpd->ibpd.device; > iwqp->lsmm_mr = ibmr; > if (iwqp->page) >- iwqp->sc_qp.qp_uk.sq_base = kmap(iwqp->page); >+ iwqp->sc_qp.qp_uk.sq_base = kmap_thread(iwqp->page); > dev->iw_priv_qp_ops->qp_send_lsmm(&iwqp->sc_qp, > iwqp->ietf_mem.va, > (accept.size + > conn_param->private_data_len), >@@ -3729,12 +3729,12 @@ int i40iw_accept(struct iw_cm_id *cm_id, >struct iw_cm_conn_param *conn_param) > > } else { > if (iwqp->page) >- iwqp->sc_qp.qp_uk.sq_base = kmap(iwqp->page); >+ iwqp->sc_qp.qp_uk.sq_base = kmap_thread(iwqp->page); > dev->iw_priv_qp_ops->qp_send_lsmm(&iwqp->sc_qp, NULL, 0, 0); > } > > if (iwqp->page) >- kunmap(iwqp->page); >+ kunmap_thread(iwqp->page); > > iwqp->cm_id = cm_id; > cm_node->cm_id = cm_id; >@@ -4102,10 +4102,10 @@ static void i40iw_cm_event_connected(struct >i40iw_cm_event *event) > i40iw_cm_init_tsa_conn(iwqp, cm_node); >
[PATCH] powerpc/mm: Fix verification of MMU_FTR_TYPE_44x
MMU_FTR_TYPE_44x cannot be checked by cpu_has_feature() Use mmu_has_feature() instead Fixes: 23eb7f560a2a ("powerpc: Convert flush_icache_range & friends to C") Cc: sta...@vger.kernel.org Signed-off-by: Christophe Leroy --- arch/powerpc/mm/mem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index ddc32cc1b6cf..b7586d8c835b 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -530,7 +530,7 @@ void __flush_dcache_icache(void *p) * space occurs, before returning to user space. */ - if (cpu_has_feature(MMU_FTR_TYPE_44x)) + if (mmu_has_feature(MMU_FTR_TYPE_44x)) return; invalidate_icache_range(addr, addr + PAGE_SIZE); -- 2.25.0
[PATCH 4/4] powerpc/8xx: Manage _PAGE_ACCESSED through APG bits in L1 entry
When _PAGE_ACCESSED is not set, a minor fault is expected. To do this, TLB miss exception ANDs _PAGE_PRESENT and _PAGE_ACCESSED into the L2 entry valid bit. To simplify the processing and reduce the number of instructions in TLB miss exceptions, manage it as an APG bit and get it next to _PAGE_GUARDED bit to allow a copy in one go. Then declare the corresponding groups as handling all accesses as user accesses. As the PP bits always define user as No Access, it will generate a fault. Signed-off-by: Christophe Leroy --- arch/powerpc/include/asm/nohash/32/kup-8xx.h | 2 +- arch/powerpc/include/asm/nohash/32/mmu-8xx.h | 47 +++- arch/powerpc/include/asm/nohash/32/pte-8xx.h | 9 ++-- arch/powerpc/kernel/head_8xx.S | 36 +++ 4 files changed, 28 insertions(+), 66 deletions(-) diff --git a/arch/powerpc/include/asm/nohash/32/kup-8xx.h b/arch/powerpc/include/asm/nohash/32/kup-8xx.h index 85ed2390fb99..567cdc557402 100644 --- a/arch/powerpc/include/asm/nohash/32/kup-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/kup-8xx.h @@ -63,7 +63,7 @@ static inline void restore_user_access(unsigned long flags) static inline bool bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) { - return WARN(!((regs->kuap ^ MD_APG_KUAP) & 0xf000), + return WARN(!((regs->kuap ^ MD_APG_KUAP) & 0xff00), "Bug: fault blocked by AP register !"); } diff --git a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h index 1d9ac0f9c794..0bd1b144eb76 100644 --- a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h @@ -33,19 +33,18 @@ * respectively NA for All or X for Supervisor and no access for User. * Then we use the APG to say whether accesses are according to Page rules or * "all Supervisor" rules (Access to all) - * Therefore, we define 2 APG groups. lsb is _PMD_USER - * 0 => Kernel => 01 (all accesses performed according to page definition) - * 1 => User => 00 (all accesses performed as supervisor iaw page definition) - * 2-15 => Not Used - */ -#define MI_APG_INIT0x4000 - -/* - * 0 => Kernel => 01 (all accesses performed according to page definition) - * 1 => User => 10 (all accesses performed according to swaped page definition) - * 2-15 => Not Used - */ -#define MI_APG_KUEP0x6000 + * _PAGE_ACCESSED is also managed via APG. When _PAGE_ACCESSED is not set, say + * "all User" rules, that will lead to NA for all. + * Therefore, we define 4 APG groups. lsb is _PAGE_ACCESSED + * 0 => Kernel => 11 (all accesses performed according as user iaw page definition) + * 1 => Kernel+Accessed => 01 (all accesses performed according to page definition) + * 2 => User => 11 (all accesses performed according as user iaw page definition) + * 3 => User+Accessed => 00 (all accesses performed as supervisor iaw page definition) for INIT + *=> 10 (all accesses performed according to swaped page definition) for KUEP + * 4-15 => Not Used + */ +#define MI_APG_INIT0xdc00 +#define MI_APG_KUEP0xde00 /* The effective page number register. When read, contains the information * about the last instruction TLB miss. When MI_RPN is written, bits in @@ -106,25 +105,9 @@ #define MD_Ks 0x8000 /* Should not be set */ #define MD_Kp 0x4000 /* Should always be set */ -/* - * All pages' PP data bits are set to either 000 or 011 or 001, which means - * respectively RW for Supervisor and no access for User, or RO for - * Supervisor and no access for user and NA for ALL. - * Then we use the APG to say whether accesses are according to Page rules or - * "all Supervisor" rules (Access to all) - * Therefore, we define 2 APG groups. lsb is _PMD_USER - * 0 => Kernel => 01 (all accesses performed according to page definition) - * 1 => User => 00 (all accesses performed as supervisor iaw page definition) - * 2-15 => Not Used - */ -#define MD_APG_INIT0x4000 - -/* - * 0 => No user => 01 (all accesses performed according to page definition) - * 1 => User => 10 (all accesses performed according to swaped page definition) - * 2-15 => Not Used - */ -#define MD_APG_KUAP0x6000 +/* See explanation above at the definition of MI_APG_INIT */ +#define MD_APG_INIT0xdc00 +#define MD_APG_KUAP0xde00 /* The effective page number register. When read, contains the information * about the last instruction TLB miss. When MD_RPN is written, bits in diff --git a/arch/powerpc/include/asm/nohash/32/pte-8xx.h b/arch/powerpc/include/asm/nohash/32/pte-8xx.h index 66f403a7da44..1581204467e1 100644 --- a/arch/powerpc/include/asm/nohash/32/pte-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/pte-8xx.h @@ -39,9 +39,9 @@ * into the TLB. */ #define _PAGE_GUARDED 0x0010 /* Copied to L1 G entry in DTLB */ -#define _PAGE_SPECIAL 0x0020 /* SW entry */ +#define _PAGE_ACCESSED 0x0020
[PATCH] powerpc/40x: Always fault when _PAGE_ACCESSED is not set
The kernel expects pte_young() to work regardless of CONFIG_SWAP. Make sure a minor fault is taken to set _PAGE_ACCESSED when it is not already set, regardless of the selection of CONFIG_SWAP. Fixes: 2c74e2586bb9 ("powerpc/40x: Rework 40x PTE access and TLB miss") Cc: sta...@vger.kernel.org Signed-off-by: Christophe Leroy --- arch/powerpc/kernel/head_40x.S | 8 1 file changed, 8 deletions(-) diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index 44c9018aed1b..a1ae00689e0f 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -284,11 +284,7 @@ _ENTRY(saved_ksp_limit) rlwimi r11, r10, 22, 20, 29/* Compute PTE address */ lwz r11, 0(r11) /* Get Linux PTE */ -#ifdef CONFIG_SWAP li r9, _PAGE_PRESENT | _PAGE_ACCESSED -#else - li r9, _PAGE_PRESENT -#endif andc. r9, r9, r11 /* Check permission */ bne 5f @@ -369,11 +365,7 @@ _ENTRY(saved_ksp_limit) rlwimi r11, r10, 22, 20, 29/* Compute PTE address */ lwz r11, 0(r11) /* Get Linux PTE */ -#ifdef CONFIG_SWAP li r9, _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC -#else - li r9, _PAGE_PRESENT | _PAGE_EXEC -#endif andc. r9, r9, r11 /* Check permission */ bne 5f -- 2.25.0
[PATCH 3/4] powerpc/8xx: Always fault when _PAGE_ACCESSED is not set
The kernel expects pte_young() to work regardless of CONFIG_SWAP. Make sure a minor fault is taken to set _PAGE_ACCESSED when it is not already set, regardless of the selection of CONFIG_SWAP. This adds at least 3 instructions to the TLB miss exception handlers fast path. Following patch will reduce this overhead. Fixes: d069cb4373fe ("powerpc/8xx: Don't touch ACCESSED when no SWAP.") Cc: sta...@vger.kernel.org Signed-off-by: Christophe Leroy --- arch/powerpc/kernel/head_8xx.S | 10 -- 1 file changed, 10 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 83101c5888e3..6f3799a04121 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -202,9 +202,7 @@ SystemCall: InstructionTLBMiss: mtspr SPRN_SPRG_SCRATCH0, r10 -#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_SWAP) || defined(CONFIG_HUGETLBFS) mtspr SPRN_SPRG_SCRATCH1, r11 -#endif /* If we are faulting a kernel address, we have to use the * kernel page tables. @@ -238,11 +236,9 @@ InstructionTLBMiss: rlwimi r11, r10, 32 - 9, _PMD_PAGE_512K mtspr SPRN_MI_TWC, r11 #endif -#ifdef CONFIG_SWAP rlwinm r11, r10, 32-7, _PAGE_PRESENT and r11, r11, r10 rlwimi r10, r11, 0, _PAGE_PRESENT -#endif /* The Linux PTE won't go exactly into the MMU TLB. * Software indicator bits 20 and 23 must be clear. * Software indicator bits 22, 24, 25, 26, and 27 must be @@ -256,9 +252,7 @@ InstructionTLBMiss: /* Restore registers */ 0: mfspr r10, SPRN_SPRG_SCRATCH0 -#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_SWAP) || defined(CONFIG_HUGETLBFS) mfspr r11, SPRN_SPRG_SCRATCH1 -#endif rfi patch_site 0b, patch__itlbmiss_exit_1 @@ -268,9 +262,7 @@ InstructionTLBMiss: addir10, r10, 1 stw r10, (itlb_miss_counter - PAGE_OFFSET)@l(0) mfspr r10, SPRN_SPRG_SCRATCH0 -#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_SWAP) || defined(CONFIG_HUGETLBFS) mfspr r11, SPRN_SPRG_SCRATCH1 -#endif rfi #endif @@ -316,11 +308,9 @@ DataStoreTLBMiss: * r11 = ((r10 & PRESENT) & ((r10 & ACCESSED) >> 5)); * r10 = (r10 & ~PRESENT) | r11; */ -#ifdef CONFIG_SWAP rlwinm r11, r10, 32-7, _PAGE_PRESENT and r11, r11, r10 rlwimi r10, r11, 0, _PAGE_PRESENT -#endif /* The Linux PTE won't go exactly into the MMU TLB. * Software indicator bits 24, 25, 26, and 27 must be * set. All other Linux PTE bits control the behavior -- 2.25.0
[PATCH 2/4] powerpc/8xx: Fix TLB Miss exceptions with CONFIG_SWAP
When CONFIG_SWAP is set, _PAGE_ACCESSED gets ANDed with _PAGE_PRESENT. But during several changes of _PAGE_ACCESSED value, this operation which is based on bit rotation has been forgotten. As of today it doesn't work. Update to the rotation to the correct number of bits. Fixes: 5f356497c384 ("powerpc/8xx: remove unused _PAGE_WRITETHRU") Fixes: e0a8e0d90a9f ("powerpc/8xx: Handle PAGE_USER via APG bits") Fixes: 5b2753fc3e8a ("powerpc/8xx: Implementation of PAGE_EXEC") Cc: sta...@vger.kernel.org Signed-off-by: Christophe Leroy --- arch/powerpc/kernel/head_8xx.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 32d85387bdc5..83101c5888e3 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -239,7 +239,7 @@ InstructionTLBMiss: mtspr SPRN_MI_TWC, r11 #endif #ifdef CONFIG_SWAP - rlwinm r11, r10, 32-5, _PAGE_PRESENT + rlwinm r11, r10, 32-7, _PAGE_PRESENT and r11, r11, r10 rlwimi r10, r11, 0, _PAGE_PRESENT #endif @@ -317,7 +317,7 @@ DataStoreTLBMiss: * r10 = (r10 & ~PRESENT) | r11; */ #ifdef CONFIG_SWAP - rlwinm r11, r10, 32-5, _PAGE_PRESENT + rlwinm r11, r10, 32-7, _PAGE_PRESENT and r11, r11, r10 rlwimi r10, r11, 0, _PAGE_PRESENT #endif -- 2.25.0
[PATCH 1/4] powerpc/8xx: Fix instruction TLB miss exception with perf enabled
When perf is enabled, r11 must also be restored when CONFIG_HUGETLBFS is selected. Fixes: a891c43b97d3 ("powerpc/8xx: Prepare handlers for _PAGE_HUGE for 512k pages.") Cc: sta...@vger.kernel.org Signed-off-by: Christophe Leroy --- arch/powerpc/kernel/head_8xx.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 9f359d3fba74..32d85387bdc5 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -268,7 +268,7 @@ InstructionTLBMiss: addir10, r10, 1 stw r10, (itlb_miss_counter - PAGE_OFFSET)@l(0) mfspr r10, SPRN_SPRG_SCRATCH0 -#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_SWAP) +#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_SWAP) || defined(CONFIG_HUGETLBFS) mfspr r11, SPRN_SPRG_SCRATCH1 #endif rfi -- 2.25.0
[PATCH] powerpc/603: Always fault when _PAGE_ACCESSED is not set
The kernel expects pte_young() to work regardless of CONFIG_SWAP. Make sure a minor fault is taken to set _PAGE_ACCESSED when it is not already set, regardless of the selection of CONFIG_SWAP. Fixes: 84de6ab0e904 ("powerpc/603: don't handle PAGE_ACCESSED in TLB miss handlers.") Cc: sta...@vger.kernel.org Signed-off-by: Christophe Leroy --- arch/powerpc/kernel/head_book3s_32.S | 12 1 file changed, 12 deletions(-) diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index 5eb9eedac920..2aa16d5368e1 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -457,11 +457,7 @@ InstructionTLBMiss: cmplw 0,r1,r3 #endif mfspr r2, SPRN_SPRG_PGDIR -#ifdef CONFIG_SWAP li r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC -#else - li r1,_PAGE_PRESENT | _PAGE_EXEC -#endif #if defined(CONFIG_MODULES) || defined(CONFIG_DEBUG_PAGEALLOC) bgt-112f lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ @@ -523,11 +519,7 @@ DataLoadTLBMiss: lis r1, TASK_SIZE@h /* check if kernel address */ cmplw 0,r1,r3 mfspr r2, SPRN_SPRG_PGDIR -#ifdef CONFIG_SWAP li r1, _PAGE_PRESENT | _PAGE_ACCESSED -#else - li r1, _PAGE_PRESENT -#endif bgt-112f lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ addir2, r2, (swapper_pg_dir - PAGE_OFFSET)@l/* kernel page table */ @@ -603,11 +595,7 @@ DataStoreTLBMiss: lis r1, TASK_SIZE@h /* check if kernel address */ cmplw 0,r1,r3 mfspr r2, SPRN_SPRG_PGDIR -#ifdef CONFIG_SWAP li r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT | _PAGE_ACCESSED -#else - li r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT -#endif bgt-112f lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ addir2, r2, (swapper_pg_dir - PAGE_OFFSET)@l/* kernel page table */ -- 2.25.0
Re: [PATCH 1/2] mm/mprotect: Call arch_validate_prot under mmap_lock and with length
Hi Khalid, On Wed, Oct 07, 2020 at 02:14:09PM -0600, Khalid Aziz wrote: > On 10/7/20 1:39 AM, Jann Horn wrote: > > arch_validate_prot() is a hook that can validate whether a given set of > > protection flags is valid in an mprotect() operation. It is given the set > > of protection flags and the address being modified. > > > > However, the address being modified can currently not actually be used in > > a meaningful way because: > > > > 1. Only the address is given, but not the length, and the operation can > >span multiple VMAs. Therefore, the callee can't actually tell which > >virtual address range, or which VMAs, are being targeted. > > 2. The mmap_lock is not held, meaning that if the callee were to check > >the VMA at @addr, that VMA would be unrelated to the one the > >operation is performed on. > > > > Currently, custom arch_validate_prot() handlers are defined by > > arm64, powerpc and sparc. > > arm64 and powerpc don't care about the address range, they just check the > > flags against CPU support masks. > > sparc's arch_validate_prot() attempts to look at the VMA, but doesn't take > > the mmap_lock. > > > > Change the function signature to also take a length, and move the > > arch_validate_prot() call in mm/mprotect.c down into the locked region. [...] > As Chris pointed out, the call to arch_validate_prot() from do_mmap2() > is made without holding mmap_lock. Lock is not acquired until > vm_mmap_pgoff(). This variance is uncomfortable but I am more > uncomfortable forcing all implementations of validate_prot to require > mmap_lock be held when non-sparc implementations do not have such need > yet. Since do_mmap2() is in powerpc specific code, for now this patch > solves a current problem. I still think sparc should avoid walking the vmas in arch_validate_prot(). The core code already has the vmas, though not when calling arch_validate_prot(). That's one of the reasons I added arch_validate_flags() with the MTE patches. For sparc, this could be (untested, just copied the arch_validate_prot() code): static inline bool arch_validate_flags(unsigned long vm_flags) { if (!(vm_flags & VM_SPARC_ADI)) return true; if (!adi_capable()) return false; /* ADI can not be enabled on PFN mapped pages */ if (vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP)) return false; /* * Mergeable pages can become unmergeable if ADI is enabled on * them even if they have identical data on them. This can be * because ADI enabled pages with identical data may still not * have identical ADI tags on them. Disallow ADI on mergeable * pages. */ if (vma->vm_flags & VM_MERGEABLE) return false; return true; } > That leaves open the question of should > generic mmap call arch_validate_prot and return EINVAL for invalid > combination of protection bits, but that is better addressed in a > separate patch. The above would cover mmap() as well. The current sparc_validate_prot() relies on finding the vma for the corresponding address. However, if you call this early in the mmap() path, there's no such vma. It is only created later in mmap_region() which no longer has the original PROT_* flags (all converted to VM_* flags). Calling arch_validate_flags() on mmap() has a small side-effect on the user ABI: if the CPU is not adi_capable(), PROT_ADI is currently ignored on mmap() but rejected by sparc_validate_prot(). Powerpc already does this already and I think it should be fine for arm64 (it needs checking though as we have another flag, PROT_BTI, hopefully dynamic loaders don't pass this flag unconditionally). However, as I said above, it doesn't solve the mmap() PROT_ADI checking for sparc since there's no vma yet. I'd strongly recommend the arch_validate_flags() approach and reverting the "start" parameter added to arch_validate_prot() if you go for the flags route. Thanks. -- Catalin