Re: [PATCH v3 2/4] powerpc/sstep: support emulation for vsx vector paired storage access instructions

2020-08-05 Thread Naveen N. Rao

Balamuruhan S wrote:

add emulate_step() changes to support vsx vector paired storage
access instructions that provides octword operands loads/stores
between storage and set of 64 Vector Scalar Registers (VSRs).


This should be squashed in with the previous patch. Otherwise, emulation 
of these instructions won't be complete, which affects bisectability.


- Naveen



[PATCH v3 2/4] powerpc/sstep: support emulation for vsx vector paired storage access instructions

2020-07-31 Thread Balamuruhan S
add emulate_step() changes to support vsx vector paired storage
access instructions that provides octword operands loads/stores
between storage and set of 64 Vector Scalar Registers (VSRs).

Suggested-by: Ravi Bangoria 
Suggested-by: Naveen N. Rao 
Signed-off-by: Balamuruhan S 
---
 arch/powerpc/lib/sstep.c | 77 +++-
 1 file changed, 60 insertions(+), 17 deletions(-)

diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index 22147257d74d..01e1a3adc406 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -280,6 +280,19 @@ static nokprobe_inline void do_byte_reverse(void *ptr, int 
nb)
up[1] = tmp;
break;
}
+   case 32: {
+   unsigned long *up = (unsigned long *)ptr;
+   unsigned long tmp;
+
+   tmp = byterev_8(up[0]);
+   up[0] = byterev_8(up[3]);
+   up[3] = tmp;
+   tmp = byterev_8(up[2]);
+   up[2] = byterev_8(up[1]);
+   up[1] = tmp;
+   break;
+   }
+
 #endif
default:
WARN_ON_ONCE(1);
@@ -710,6 +723,8 @@ void emulate_vsx_load(struct instruction_op *op, union 
vsx_reg *reg,
reg->d[0] = reg->d[1] = 0;
 
switch (op->element_size) {
+   case 32:
+   /* [p]lxvp[x] */
case 16:
/* whole vector; lxv[x] or lxvl[l] */
if (size == 0)
@@ -718,7 +733,7 @@ void emulate_vsx_load(struct instruction_op *op, union 
vsx_reg *reg,
if (IS_LE && (op->vsx_flags & VSX_LDLEFT))
rev = !rev;
if (rev)
-   do_byte_reverse(reg, 16);
+   do_byte_reverse(reg, size);
break;
case 8:
/* scalar loads, lxvd2x, lxvdsx */
@@ -794,6 +809,20 @@ void emulate_vsx_store(struct instruction_op *op, const 
union vsx_reg *reg,
size = GETSIZE(op->type);
 
switch (op->element_size) {
+   case 32:
+   /* [p]stxvp[x] */
+   if (size == 0)
+   break;
+   if (rev) {
+   /* reverse 32 bytes */
+   buf.d[0] = byterev_8(reg->d[3]);
+   buf.d[1] = byterev_8(reg->d[2]);
+   buf.d[2] = byterev_8(reg->d[1]);
+   buf.d[3] = byterev_8(reg->d[0]);
+   reg = 
+   }
+   memcpy(mem, reg, size);
+   break;
case 16:
/* stxv, stxvx, stxvl, stxvll */
if (size == 0)
@@ -862,28 +891,35 @@ static nokprobe_inline int do_vsx_load(struct 
instruction_op *op,
   bool cross_endian)
 {
int reg = op->reg;
-   u8 mem[16];
-   union vsx_reg buf;
+   int i, nr_vsx_regs;
+   u8 mem[32];
+   union vsx_reg buf[2];
int size = GETSIZE(op->type);
 
if (!address_ok(regs, ea, size) || copy_mem_in(mem, ea, size, regs))
return -EFAULT;
 
-   emulate_vsx_load(op, , mem, cross_endian);
+   nr_vsx_regs = size / sizeof(__vector128);
+   emulate_vsx_load(op, buf, mem, cross_endian);
preempt_disable();
if (reg < 32) {
/* FP regs + extensions */
if (regs->msr & MSR_FP) {
-   load_vsrn(reg, );
+   for (i = 0; i < nr_vsx_regs; i++)
+   load_vsrn(reg + i, [i].v);
} else {
-   current->thread.fp_state.fpr[reg][0] = buf.d[0];
-   current->thread.fp_state.fpr[reg][1] = buf.d[1];
+   for (i = 0; i < nr_vsx_regs; i++) {
+   current->thread.fp_state.fpr[reg + i][0] = 
buf[i].d[0];
+   current->thread.fp_state.fpr[reg + i][1] = 
buf[i].d[1];
+   }
}
} else {
if (regs->msr & MSR_VEC)
-   load_vsrn(reg, );
+   for (i = 0; i < nr_vsx_regs; i++)
+   load_vsrn(reg + i, [i].v);
else
-   current->thread.vr_state.vr[reg - 32] = buf.v;
+   for (i = 0; i < nr_vsx_regs; i++)
+   current->thread.vr_state.vr[reg - 32 + i] = 
buf[i].v;
}
preempt_enable();
return 0;
@@ -894,30 +930,37 @@ static nokprobe_inline int do_vsx_store(struct 
instruction_op *op,
bool cross_endian)
 {
int reg = op->reg;
-   u8 mem[16];
-   union vsx_reg buf;
+   int i, nr_vsx_regs;
+   u8 mem[32];
+   union vsx_reg buf[2];
int size = GETSIZE(op->type);
 
if (!address_ok(regs, ea, size))
return -EFAULT;
 
+   nr_vsx_regs = size / sizeof(__vector128);