From: Balamuruhan S <bal...@linux.ibm.com>

VSX Vector Paired instructions loads/stores an octword (32 bytes)
from/to storage into two sequential VSRs. Add emulation support
for these new instructions:
  * Load VSX Vector Paired (lxvp)
  * Load VSX Vector Paired Indexed (lxvpx)
  * Prefixed Load VSX Vector Paired (plxvp)
  * Store VSX Vector Paired (stxvp)
  * Store VSX Vector Paired Indexed (stxvpx)
  * Prefixed Store VSX Vector Paired (pstxvp)

Suggested-by: Naveen N. Rao <naveen.n....@linux.vnet.ibm.com>
Signed-off-by: Balamuruhan S <bal...@linux.ibm.com>
Signed-off-by: Ravi Bangoria <ravi.bango...@linux.ibm.com>
[kernel test robot reported a build failure]
Reported-by: kernel test robot <l...@intel.com>
---
 arch/powerpc/lib/sstep.c | 150 +++++++++++++++++++++++++++++++++------
 1 file changed, 129 insertions(+), 21 deletions(-)

diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index faf0bbf3efb7..96ca813a65e7 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -32,6 +32,10 @@ extern char system_call_vectored_emulate[];
 #define XER_OV32       0x00080000U
 #define XER_CA32       0x00040000U
 
+#ifdef CONFIG_VSX
+#define VSX_REGISTER_XTP(rd)   ((((rd) & 1) << 5) | ((rd) & 0xfe))
+#endif
+
 #ifdef CONFIG_PPC_FPU
 /*
  * Functions in ldstfp.S
@@ -279,6 +283,19 @@ static nokprobe_inline void do_byte_reverse(void *ptr, int 
nb)
                up[1] = tmp;
                break;
        }
+       case 32: {
+               unsigned long *up = (unsigned long *)ptr;
+               unsigned long tmp;
+
+               tmp = byterev_8(up[0]);
+               up[0] = byterev_8(up[3]);
+               up[3] = tmp;
+               tmp = byterev_8(up[2]);
+               up[2] = byterev_8(up[1]);
+               up[1] = tmp;
+               break;
+       }
+
 #endif
        default:
                WARN_ON_ONCE(1);
@@ -709,6 +726,8 @@ void emulate_vsx_load(struct instruction_op *op, union 
vsx_reg *reg,
        reg->d[0] = reg->d[1] = 0;
 
        switch (op->element_size) {
+       case 32:
+               /* [p]lxvp[x] */
        case 16:
                /* whole vector; lxv[x] or lxvl[l] */
                if (size == 0)
@@ -717,7 +736,7 @@ void emulate_vsx_load(struct instruction_op *op, union 
vsx_reg *reg,
                if (IS_LE && (op->vsx_flags & VSX_LDLEFT))
                        rev = !rev;
                if (rev)
-                       do_byte_reverse(reg, 16);
+                       do_byte_reverse(reg, size);
                break;
        case 8:
                /* scalar loads, lxvd2x, lxvdsx */
@@ -793,6 +812,20 @@ void emulate_vsx_store(struct instruction_op *op, const 
union vsx_reg *reg,
        size = GETSIZE(op->type);
 
        switch (op->element_size) {
+       case 32:
+               /* [p]stxvp[x] */
+               if (size == 0)
+                       break;
+               if (rev) {
+                       /* reverse 32 bytes */
+                       buf.d[0] = byterev_8(reg->d[3]);
+                       buf.d[1] = byterev_8(reg->d[2]);
+                       buf.d[2] = byterev_8(reg->d[1]);
+                       buf.d[3] = byterev_8(reg->d[0]);
+                       reg = &buf;
+               }
+               memcpy(mem, reg, size);
+               break;
        case 16:
                /* stxv, stxvx, stxvl, stxvll */
                if (size == 0)
@@ -861,28 +894,43 @@ static nokprobe_inline int do_vsx_load(struct 
instruction_op *op,
                                       bool cross_endian)
 {
        int reg = op->reg;
-       u8 mem[16];
-       union vsx_reg buf;
+       int i, j, nr_vsx_regs;
+       u8 mem[32];
+       union vsx_reg buf[2];
        int size = GETSIZE(op->type);
 
        if (!address_ok(regs, ea, size) || copy_mem_in(mem, ea, size, regs))
                return -EFAULT;
 
-       emulate_vsx_load(op, &buf, mem, cross_endian);
+       nr_vsx_regs = size / sizeof(__vector128);
+       emulate_vsx_load(op, buf, mem, cross_endian);
        preempt_disable();
        if (reg < 32) {
                /* FP regs + extensions */
                if (regs->msr & MSR_FP) {
-                       load_vsrn(reg, &buf);
+                       for (i = 0; i < nr_vsx_regs; i++) {
+                               j = IS_LE ? nr_vsx_regs - i - 1 : i;
+                               load_vsrn(reg + i, &buf[j].v);
+                       }
                } else {
-                       current->thread.fp_state.fpr[reg][0] = buf.d[0];
-                       current->thread.fp_state.fpr[reg][1] = buf.d[1];
+                       for (i = 0; i < nr_vsx_regs; i++) {
+                               j = IS_LE ? nr_vsx_regs - i - 1 : i;
+                               current->thread.fp_state.fpr[reg + i][0] = 
buf[j].d[0];
+                               current->thread.fp_state.fpr[reg + i][1] = 
buf[j].d[1];
+                       }
                }
        } else {
-               if (regs->msr & MSR_VEC)
-                       load_vsrn(reg, &buf);
-               else
-                       current->thread.vr_state.vr[reg - 32] = buf.v;
+               if (regs->msr & MSR_VEC) {
+                       for (i = 0; i < nr_vsx_regs; i++) {
+                               j = IS_LE ? nr_vsx_regs - i - 1 : i;
+                               load_vsrn(reg + i, &buf[j].v);
+                       }
+               } else {
+                       for (i = 0; i < nr_vsx_regs; i++) {
+                               j = IS_LE ? nr_vsx_regs - i - 1 : i;
+                               current->thread.vr_state.vr[reg - 32 + i] = 
buf[j].v;
+                       }
+               }
        }
        preempt_enable();
        return 0;
@@ -893,30 +941,45 @@ static nokprobe_inline int do_vsx_store(struct 
instruction_op *op,
                                        bool cross_endian)
 {
        int reg = op->reg;
-       u8 mem[16];
-       union vsx_reg buf;
+       int i, j, nr_vsx_regs;
+       u8 mem[32];
+       union vsx_reg buf[2];
        int size = GETSIZE(op->type);
 
        if (!address_ok(regs, ea, size))
                return -EFAULT;
 
+       nr_vsx_regs = size / sizeof(__vector128);
        preempt_disable();
        if (reg < 32) {
                /* FP regs + extensions */
                if (regs->msr & MSR_FP) {
-                       store_vsrn(reg, &buf);
+                       for (i = 0; i < nr_vsx_regs; i++) {
+                               j = IS_LE ? nr_vsx_regs - i - 1 : i;
+                               store_vsrn(reg + i, &buf[j].v);
+                       }
                } else {
-                       buf.d[0] = current->thread.fp_state.fpr[reg][0];
-                       buf.d[1] = current->thread.fp_state.fpr[reg][1];
+                       for (i = 0; i < nr_vsx_regs; i++) {
+                               j = IS_LE ? nr_vsx_regs - i - 1 : i;
+                               buf[j].d[0] = current->thread.fp_state.fpr[reg 
+ i][0];
+                               buf[j].d[1] = current->thread.fp_state.fpr[reg 
+ i][1];
+                       }
                }
        } else {
-               if (regs->msr & MSR_VEC)
-                       store_vsrn(reg, &buf);
-               else
-                       buf.v = current->thread.vr_state.vr[reg - 32];
+               if (regs->msr & MSR_VEC) {
+                       for (i = 0; i < nr_vsx_regs; i++) {
+                               j = IS_LE ? nr_vsx_regs - i - 1 : i;
+                               store_vsrn(reg + i, &buf[j].v);
+                       }
+               } else {
+                       for (i = 0; i < nr_vsx_regs; i++) {
+                               j = IS_LE ? nr_vsx_regs - i - 1 : i;
+                               buf[j].v = current->thread.vr_state.vr[reg - 32 
+ i];
+                       }
+               }
        }
        preempt_enable();
-       emulate_vsx_store(op, &buf, mem, cross_endian);
+       emulate_vsx_store(op, buf, mem, cross_endian);
        return  copy_mem_out(mem, ea, size, regs);
 }
 #endif /* CONFIG_VSX */
@@ -2403,6 +2466,14 @@ int analyse_instr(struct instruction_op *op, const 
struct pt_regs *regs,
                        op->vsx_flags = VSX_SPLAT;
                        break;
 
+               case 333:       /* lxvpx */
+                       if (!cpu_has_feature(CPU_FTR_ARCH_31))
+                               return -1;
+                       op->reg = VSX_REGISTER_XTP(rd);
+                       op->type = MKOP(LOAD_VSX, 0, 32);
+                       op->element_size = 32;
+                       break;
+
                case 364:       /* lxvwsx */
                        op->reg = rd | ((word & 1) << 5);
                        op->type = MKOP(LOAD_VSX, 0, 4);
@@ -2431,6 +2502,13 @@ int analyse_instr(struct instruction_op *op, const 
struct pt_regs *regs,
                                VSX_CHECK_VEC;
                        break;
                }
+               case 461:       /* stxvpx */
+                       if (!cpu_has_feature(CPU_FTR_ARCH_31))
+                               return -1;
+                       op->reg = VSX_REGISTER_XTP(rd);
+                       op->type = MKOP(STORE_VSX, 0, 32);
+                       op->element_size = 32;
+                       break;
                case 524:       /* lxsspx */
                        op->reg = rd | ((word & 1) << 5);
                        op->type = MKOP(LOAD_VSX, 0, 4);
@@ -2672,6 +2750,22 @@ int analyse_instr(struct instruction_op *op, const 
struct pt_regs *regs,
 #endif
 
 #ifdef CONFIG_VSX
+       case 6:
+               if (!cpu_has_feature(CPU_FTR_ARCH_31))
+                       return -1;
+               op->ea = dqform_ea(word, regs);
+               op->reg = VSX_REGISTER_XTP(rd);
+               op->element_size = 32;
+               switch (word & 0xf) {
+               case 0:         /* lxvp */
+                       op->type = MKOP(LOAD_VSX, 0, 32);
+                       break;
+               case 1:         /* stxvp */
+                       op->type = MKOP(STORE_VSX, 0, 32);
+                       break;
+               }
+               break;
+
        case 61:        /* stfdp, lxv, stxsd, stxssp, stxv */
                switch (word & 7) {
                case 0:         /* stfdp with LSB of DS field = 0 */
@@ -2805,12 +2899,26 @@ int analyse_instr(struct instruction_op *op, const 
struct pt_regs *regs,
                        case 57:        /* pld */
                                op->type = MKOP(LOAD, PREFIXED, 8);
                                break;
+#ifdef CONFIG_VSX
+                       case 58:        /* plxvp */
+                               op->reg = VSX_REGISTER_XTP(rd);
+                               op->type = MKOP(LOAD_VSX, PREFIXED, 32);
+                               op->element_size = 32;
+                               break;
+#endif /* CONFIG_VSX */
                        case 60:        /* pstq */
                                op->type = MKOP(STORE, PREFIXED, 16);
                                break;
                        case 61:        /* pstd */
                                op->type = MKOP(STORE, PREFIXED, 8);
                                break;
+#ifdef CONFIG_VSX
+                       case 62:        /* pstxvp */
+                               op->reg = VSX_REGISTER_XTP(rd);
+                               op->type = MKOP(STORE_VSX, PREFIXED, 32);
+                               op->element_size = 32;
+                               break;
+#endif /* CONFIG_VSX */
                        }
                        break;
                case 1: /* Type 01 Eight-Byte Register-to-Register */
-- 
2.26.2

Reply via email to