On 3/27/23 20:06, Song Gao wrote:
+void HELPER(vshuf_b)(CPULoongArchState *env, + uint32_t vd, uint32_t vj, uint32_t vk, uint32_t va) +{ + int i, m, k; + VReg temp; + VReg *Vd = &(env->fpr[vd].vreg); + VReg *Vj = &(env->fpr[vj].vreg); + VReg *Vk = &(env->fpr[vk].vreg); + VReg *Va = &(env->fpr[va].vreg); + + m = LSX_LEN/8; + for (i = 0; i < m ; i++) { + k = (Va->B(i)& 0x3f) % (2 * m);
Eh? Double masking?
+ temp.B(i) = (Va->B(i) & 0xc0) ? 0 : k < m ? Vk->B(k) : Vj->B(k - m);
Triple masking? I would have expected something like k = Va->B(i) % N; temp.B(i) = (k < m ? Vj : k < 2 * m ? Vk : 0);
+#define VSHUF(NAME, BIT, E) \ +void HELPER(NAME)(CPULoongArchState *env, \ + uint32_t vd, uint32_t vj, uint32_t vk) \ +{ \ + int i, m, k; \ + VReg temp; \ + VReg *Vd = &(env->fpr[vd].vreg); \ + VReg *Vj = &(env->fpr[vj].vreg); \ + VReg *Vk = &(env->fpr[vk].vreg); \ + \ + m = LSX_LEN/BIT; \ + for (i = 0; i < m; i++) { \ + k = (Vd->E(i) & 0x3f) % (2 * m); \ + temp.E(i) = (Vd->E(i) & 0xc0) ? 0 : k < m ? Vk->E(k) : Vj->E(k - m); \ + } \ + Vd->D(0) = temp.D(0); \ + Vd->D(1) = temp.D(1); \ +}
Likewise.
+#define SHF_POS(i, imm) (((i) & 0xfc) + (((imm) >> (2 * ((i) & 0x03))) & 0x03)) + +#define VSHUF4I(NAME, BIT, E) \ +void HELPER(NAME)(CPULoongArchState *env, \ + uint32_t vd, uint32_t vj, uint32_t imm) \ +{ \ + int i; \ + VReg temp; \ + VReg *Vd = &(env->fpr[vd].vreg); \ + VReg *Vj = &(env->fpr[vj].vreg); \ + \ + for (i = 0; i < LSX_LEN/BIT; i++) { \ + temp.E(i) = Vj->E(SHF_POS(i, imm)); \ + } \ + Vd->D[0] = temp.D[0]; \ + Vd->D[1] = temp.D[1]; \ +}
Merge SHF_POS unless you expect it to be used again?
+void HELPER(vshuf4i_d)(CPULoongArchState *env, + uint32_t vd, uint32_t vj, uint32_t imm) +{ + VReg *Vd = &(env->fpr[vd].vreg); + VReg *Vj = &(env->fpr[vj].vreg); + + VReg temp; + temp.D(0) = ((imm & 0x03) == 0x00) ? Vd->D(0): + ((imm & 0x03) == 0x01) ? Vd->D(1): + ((imm & 0x03) == 0x02) ? Vj->D(0): Vj->D(1); + + temp.D(1) = ((imm & 0x0c) == 0x00) ? Vd->D(0): + ((imm & 0x0c) == 0x04) ? Vd->D(1): + ((imm & 0x0c) == 0x08) ? Vj->D(0): Vj->D(1); + + Vd->D[0] = temp.D[0]; + Vd->D[1] = temp.D[1]; +}
Perhaps temp.D(0) = (imm & 2 ? Vj : Vd)->D(imm & 1); temp.D(1) = (imm & 8 ? Vj : Vd)->D((imm >> 2) & 1); r~