This patch includes: - VMADD.{B/H/W/D}; - VMSUB.{B/H/W/D}; - VMADDW{EV/OD}.{H.B/W.H/D.W/Q.D}[U]; - VMADDW{EV/OD}.{H.BU.B/W.HU.H/D.WU.W/Q.DU.D}.
Signed-off-by: Song Gao <gaos...@loongson.cn> --- target/loongarch/disas.c | 34 ++++ target/loongarch/helper.h | 34 ++++ target/loongarch/insn_trans/trans_lsx.c.inc | 34 ++++ target/loongarch/insns.decode | 34 ++++ target/loongarch/lsx_helper.c | 202 ++++++++++++++++++++ 5 files changed, 338 insertions(+) diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c index 8818e078c1..3c11c6d5d2 100644 --- a/target/loongarch/disas.c +++ b/target/loongarch/disas.c @@ -981,3 +981,37 @@ INSN_LSX(vmulwod_h_bu_b, vvv) INSN_LSX(vmulwod_w_hu_h, vvv) INSN_LSX(vmulwod_d_wu_w, vvv) INSN_LSX(vmulwod_q_du_d, vvv) + +INSN_LSX(vmadd_b, vvv) +INSN_LSX(vmadd_h, vvv) +INSN_LSX(vmadd_w, vvv) +INSN_LSX(vmadd_d, vvv) +INSN_LSX(vmsub_b, vvv) +INSN_LSX(vmsub_h, vvv) +INSN_LSX(vmsub_w, vvv) +INSN_LSX(vmsub_d, vvv) + +INSN_LSX(vmaddwev_h_b, vvv) +INSN_LSX(vmaddwev_w_h, vvv) +INSN_LSX(vmaddwev_d_w, vvv) +INSN_LSX(vmaddwev_q_d, vvv) +INSN_LSX(vmaddwod_h_b, vvv) +INSN_LSX(vmaddwod_w_h, vvv) +INSN_LSX(vmaddwod_d_w, vvv) +INSN_LSX(vmaddwod_q_d, vvv) +INSN_LSX(vmaddwev_h_bu, vvv) +INSN_LSX(vmaddwev_w_hu, vvv) +INSN_LSX(vmaddwev_d_wu, vvv) +INSN_LSX(vmaddwev_q_du, vvv) +INSN_LSX(vmaddwod_h_bu, vvv) +INSN_LSX(vmaddwod_w_hu, vvv) +INSN_LSX(vmaddwod_d_wu, vvv) +INSN_LSX(vmaddwod_q_du, vvv) +INSN_LSX(vmaddwev_h_bu_b, vvv) +INSN_LSX(vmaddwev_w_hu_h, vvv) +INSN_LSX(vmaddwev_d_wu_w, vvv) +INSN_LSX(vmaddwev_q_du_d, vvv) +INSN_LSX(vmaddwod_h_bu_b, vvv) +INSN_LSX(vmaddwod_w_hu_h, vvv) +INSN_LSX(vmaddwod_d_wu_w, vvv) +INSN_LSX(vmaddwod_q_du_d, vvv) diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h index 568a89eec1..4d71b45fe0 100644 --- a/target/loongarch/helper.h +++ b/target/loongarch/helper.h @@ -336,3 +336,37 @@ DEF_HELPER_4(vmulwod_h_bu_b, void, env, i32, i32, i32) DEF_HELPER_4(vmulwod_w_hu_h, void, env, i32, i32, i32) DEF_HELPER_4(vmulwod_d_wu_w, void, env, i32, i32, i32) DEF_HELPER_4(vmulwod_q_du_d, void, env, i32, i32, i32) + +DEF_HELPER_4(vmadd_b, void, env, i32, i32, i32) +DEF_HELPER_4(vmadd_h, void, env, i32, i32, i32) +DEF_HELPER_4(vmadd_w, void, env, i32, i32, i32) +DEF_HELPER_4(vmadd_d, void, env, i32, i32, i32) +DEF_HELPER_4(vmsub_b, void, env, i32, i32, i32) +DEF_HELPER_4(vmsub_h, void, env, i32, i32, i32) +DEF_HELPER_4(vmsub_w, void, env, i32, i32, i32) +DEF_HELPER_4(vmsub_d, void, env, i32, i32, i32) + +DEF_HELPER_4(vmaddwev_h_b, void, env, i32, i32, i32) +DEF_HELPER_4(vmaddwev_w_h, void, env, i32, i32, i32) +DEF_HELPER_4(vmaddwev_d_w, void, env, i32, i32, i32) +DEF_HELPER_4(vmaddwev_q_d, void, env, i32, i32, i32) +DEF_HELPER_4(vmaddwod_h_b, void, env, i32, i32, i32) +DEF_HELPER_4(vmaddwod_w_h, void, env, i32, i32, i32) +DEF_HELPER_4(vmaddwod_d_w, void, env, i32, i32, i32) +DEF_HELPER_4(vmaddwod_q_d, void, env, i32, i32, i32) +DEF_HELPER_4(vmaddwev_h_bu, void, env, i32, i32, i32) +DEF_HELPER_4(vmaddwev_w_hu, void, env, i32, i32, i32) +DEF_HELPER_4(vmaddwev_d_wu, void, env, i32, i32, i32) +DEF_HELPER_4(vmaddwev_q_du, void, env, i32, i32, i32) +DEF_HELPER_4(vmaddwod_h_bu, void, env, i32, i32, i32) +DEF_HELPER_4(vmaddwod_w_hu, void, env, i32, i32, i32) +DEF_HELPER_4(vmaddwod_d_wu, void, env, i32, i32, i32) +DEF_HELPER_4(vmaddwod_q_du, void, env, i32, i32, i32) +DEF_HELPER_4(vmaddwev_h_bu_b, void, env, i32, i32, i32) +DEF_HELPER_4(vmaddwev_w_hu_h, void, env, i32, i32, i32) +DEF_HELPER_4(vmaddwev_d_wu_w, void, env, i32, i32, i32) +DEF_HELPER_4(vmaddwev_q_du_d, void, env, i32, i32, i32) +DEF_HELPER_4(vmaddwod_h_bu_b, void, env, i32, i32, i32) +DEF_HELPER_4(vmaddwod_w_hu_h, void, env, i32, i32, i32) +DEF_HELPER_4(vmaddwod_d_wu_w, void, env, i32, i32, i32) +DEF_HELPER_4(vmaddwod_q_du_d, void, env, i32, i32, i32) diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc b/target/loongarch/insn_trans/trans_lsx.c.inc index 7d27f574ed..e9674af1bd 100644 --- a/target/loongarch/insn_trans/trans_lsx.c.inc +++ b/target/loongarch/insn_trans/trans_lsx.c.inc @@ -252,3 +252,37 @@ TRANS(vmulwod_h_bu_b, gen_vvv, gen_helper_vmulwod_h_bu_b) TRANS(vmulwod_w_hu_h, gen_vvv, gen_helper_vmulwod_w_hu_h) TRANS(vmulwod_d_wu_w, gen_vvv, gen_helper_vmulwod_d_wu_w) TRANS(vmulwod_q_du_d, gen_vvv, gen_helper_vmulwod_q_du_d) + +TRANS(vmadd_b, gen_vvv, gen_helper_vmadd_b) +TRANS(vmadd_h, gen_vvv, gen_helper_vmadd_h) +TRANS(vmadd_w, gen_vvv, gen_helper_vmadd_w) +TRANS(vmadd_d, gen_vvv, gen_helper_vmadd_d) +TRANS(vmsub_b, gen_vvv, gen_helper_vmsub_b) +TRANS(vmsub_h, gen_vvv, gen_helper_vmsub_h) +TRANS(vmsub_w, gen_vvv, gen_helper_vmsub_w) +TRANS(vmsub_d, gen_vvv, gen_helper_vmsub_d) + +TRANS(vmaddwev_h_b, gen_vvv, gen_helper_vmaddwev_h_b) +TRANS(vmaddwev_w_h, gen_vvv, gen_helper_vmaddwev_w_h) +TRANS(vmaddwev_d_w, gen_vvv, gen_helper_vmaddwev_d_w) +TRANS(vmaddwev_q_d, gen_vvv, gen_helper_vmaddwev_q_d) +TRANS(vmaddwod_h_b, gen_vvv, gen_helper_vmaddwod_h_b) +TRANS(vmaddwod_w_h, gen_vvv, gen_helper_vmaddwod_w_h) +TRANS(vmaddwod_d_w, gen_vvv, gen_helper_vmaddwod_d_w) +TRANS(vmaddwod_q_d, gen_vvv, gen_helper_vmaddwod_q_d) +TRANS(vmaddwev_h_bu, gen_vvv, gen_helper_vmaddwev_h_bu) +TRANS(vmaddwev_w_hu, gen_vvv, gen_helper_vmaddwev_w_hu) +TRANS(vmaddwev_d_wu, gen_vvv, gen_helper_vmaddwev_d_wu) +TRANS(vmaddwev_q_du, gen_vvv, gen_helper_vmaddwev_q_du) +TRANS(vmaddwod_h_bu, gen_vvv, gen_helper_vmaddwod_h_bu) +TRANS(vmaddwod_w_hu, gen_vvv, gen_helper_vmaddwod_w_hu) +TRANS(vmaddwod_d_wu, gen_vvv, gen_helper_vmaddwod_d_wu) +TRANS(vmaddwod_q_du, gen_vvv, gen_helper_vmaddwod_q_du) +TRANS(vmaddwev_h_bu_b, gen_vvv, gen_helper_vmaddwev_h_bu_b) +TRANS(vmaddwev_w_hu_h, gen_vvv, gen_helper_vmaddwev_w_hu_h) +TRANS(vmaddwev_d_wu_w, gen_vvv, gen_helper_vmaddwev_d_wu_w) +TRANS(vmaddwev_q_du_d, gen_vvv, gen_helper_vmaddwev_q_du_d) +TRANS(vmaddwod_h_bu_b, gen_vvv, gen_helper_vmaddwod_h_bu_b) +TRANS(vmaddwod_w_hu_h, gen_vvv, gen_helper_vmaddwod_w_hu_h) +TRANS(vmaddwod_d_wu_w, gen_vvv, gen_helper_vmaddwod_d_wu_w) +TRANS(vmaddwod_q_du_d, gen_vvv, gen_helper_vmaddwod_q_du_d) diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode index 6f32fd290e..73390a07ce 100644 --- a/target/loongarch/insns.decode +++ b/target/loongarch/insns.decode @@ -705,3 +705,37 @@ vmulwod_h_bu_b 0111 00001010 00100 ..... ..... ..... @vvv vmulwod_w_hu_h 0111 00001010 00101 ..... ..... ..... @vvv vmulwod_d_wu_w 0111 00001010 00110 ..... ..... ..... @vvv vmulwod_q_du_d 0111 00001010 00111 ..... ..... ..... @vvv + +vmadd_b 0111 00001010 10000 ..... ..... ..... @vvv +vmadd_h 0111 00001010 10001 ..... ..... ..... @vvv +vmadd_w 0111 00001010 10010 ..... ..... ..... @vvv +vmadd_d 0111 00001010 10011 ..... ..... ..... @vvv +vmsub_b 0111 00001010 10100 ..... ..... ..... @vvv +vmsub_h 0111 00001010 10101 ..... ..... ..... @vvv +vmsub_w 0111 00001010 10110 ..... ..... ..... @vvv +vmsub_d 0111 00001010 10111 ..... ..... ..... @vvv + +vmaddwev_h_b 0111 00001010 11000 ..... ..... ..... @vvv +vmaddwev_w_h 0111 00001010 11001 ..... ..... ..... @vvv +vmaddwev_d_w 0111 00001010 11010 ..... ..... ..... @vvv +vmaddwev_q_d 0111 00001010 11011 ..... ..... ..... @vvv +vmaddwod_h_b 0111 00001010 11100 ..... ..... ..... @vvv +vmaddwod_w_h 0111 00001010 11101 ..... ..... ..... @vvv +vmaddwod_d_w 0111 00001010 11110 ..... ..... ..... @vvv +vmaddwod_q_d 0111 00001010 11111 ..... ..... ..... @vvv +vmaddwev_h_bu 0111 00001011 01000 ..... ..... ..... @vvv +vmaddwev_w_hu 0111 00001011 01001 ..... ..... ..... @vvv +vmaddwev_d_wu 0111 00001011 01010 ..... ..... ..... @vvv +vmaddwev_q_du 0111 00001011 01011 ..... ..... ..... @vvv +vmaddwod_h_bu 0111 00001011 01100 ..... ..... ..... @vvv +vmaddwod_w_hu 0111 00001011 01101 ..... ..... ..... @vvv +vmaddwod_d_wu 0111 00001011 01110 ..... ..... ..... @vvv +vmaddwod_q_du 0111 00001011 01111 ..... ..... ..... @vvv +vmaddwev_h_bu_b 0111 00001011 11000 ..... ..... ..... @vvv +vmaddwev_w_hu_h 0111 00001011 11001 ..... ..... ..... @vvv +vmaddwev_d_wu_w 0111 00001011 11010 ..... ..... ..... @vvv +vmaddwev_q_du_d 0111 00001011 11011 ..... ..... ..... @vvv +vmaddwod_h_bu_b 0111 00001011 11100 ..... ..... ..... @vvv +vmaddwod_w_hu_h 0111 00001011 11101 ..... ..... ..... @vvv +vmaddwod_d_wu_w 0111 00001011 11110 ..... ..... ..... @vvv +vmaddwod_q_du_d 0111 00001011 11111 ..... ..... ..... @vvv diff --git a/target/loongarch/lsx_helper.c b/target/loongarch/lsx_helper.c index d55d2350dc..aea2e34292 100644 --- a/target/loongarch/lsx_helper.c +++ b/target/loongarch/lsx_helper.c @@ -1373,3 +1373,205 @@ DO_HELPER_VVV(vmulwod_h_bu_b, 16, helper_vvv, do_vmulwod_u_s) DO_HELPER_VVV(vmulwod_w_hu_h, 32, helper_vvv, do_vmulwod_u_s) DO_HELPER_VVV(vmulwod_d_wu_w, 64, helper_vvv, do_vmulwod_u_s) DO_HELPER_VVV(vmulwod_q_du_d, 128, helper_vvv, do_vmulwod_u_s) + +static void do_vmadd(vec_t *Vd, vec_t *Vj, vec_t *Vk, int bit, int n) +{ + switch (bit) { + case 8: + Vd->B[n] += Vj->B[n] * Vk->B[n]; + break; + case 16: + Vd->H[n] += Vj->H[n] * Vk->H[n]; + break; + case 32: + Vd->W[n] += Vj->W[n] * Vk->W[n]; + break; + case 64: + Vd->D[n] += Vj->D[n] * Vk->D[n]; + break; + default: + g_assert_not_reached(); + } +} + +static void do_vmsub(vec_t *Vd, vec_t *Vj, vec_t *Vk, int bit, int n) +{ + switch (bit) { + case 8: + Vd->B[n] -= Vj->B[n] * Vk->B[n]; + break; + case 16: + Vd->H[n] -= Vj->H[n] * Vk->H[n]; + break; + case 32: + Vd->W[n] -= Vj->W[n] * Vk->W[n]; + break; + case 64: + Vd->D[n] -= Vj->D[n] * Vk->D[n]; + break; + default: + g_assert_not_reached(); + } +} + +DO_HELPER_VVV(vmadd_b, 8, helper_vvv, do_vmadd) +DO_HELPER_VVV(vmadd_h, 16, helper_vvv, do_vmadd) +DO_HELPER_VVV(vmadd_w, 32, helper_vvv, do_vmadd) +DO_HELPER_VVV(vmadd_d, 64, helper_vvv, do_vmadd) +DO_HELPER_VVV(vmsub_b, 8, helper_vvv, do_vmsub) +DO_HELPER_VVV(vmsub_h, 16, helper_vvv, do_vmsub) +DO_HELPER_VVV(vmsub_w, 32, helper_vvv, do_vmsub) +DO_HELPER_VVV(vmsub_d, 64, helper_vvv, do_vmsub) + +static void do_vmaddwev_s(vec_t *Vd, vec_t *Vj, vec_t *Vk, int bit, int n) +{ + switch (bit) { + case 16: + Vd->H[n] += Vj->B[2 * n] * Vk->B[2 * n]; + break; + case 32: + Vd->W[n] += Vj->H[2 * n] * Vk->H[2 * n]; + break; + case 64: + Vd->D[n] += (int64_t)Vj->W[2 * n] * (int64_t)Vk->W[2 * n]; + break; + case 128: + Vd->Q[n] += (__int128_t)Vj->D[2 * n] * (__int128_t)Vk->D[2 * n]; + break; + default: + g_assert_not_reached(); + } +} + +static void do_vmaddwod_s(vec_t *Vd, vec_t *Vj, vec_t *Vk, int bit, int n) +{ + switch (bit) { + case 16: + Vd->H[n] += Vj->B[2 * n + 1] * Vk->B[2 * n + 1]; + break; + case 32: + Vd->W[n] += Vj->H[2 * n + 1] * Vk->H[2 * n + 1]; + break; + case 64: + Vd->D[n] += (int64_t)Vj->W[2 * n + 1] * (int64_t)Vk->W[2 * n + 1]; + break; + case 128: + Vd->Q[n] += (__int128_t)((__int128_t)Vj->D[2 * n + 1] * + (__int128_t)Vk->D[2 * n + 1]); + break; + default: + g_assert_not_reached(); + } +} + +static void do_vmaddwev_u(vec_t *Vd, vec_t *Vj, vec_t *Vk, int bit, int n) +{ + switch (bit) { + case 16: + Vd->H[n] += (uint8_t)Vj->B[2 * n] * (uint8_t)Vk->B[2 * n]; + break; + case 32: + Vd->W[n] += (uint16_t)Vj->H[2 * n] * (uint16_t)Vk->H[2 * n]; + break; + case 64: + Vd->D[n] += (uint64_t)(uint32_t)Vj->W[2 * n] * + (uint64_t)(uint32_t)Vk->W[2 * n]; + break; + case 128: + Vd->Q[n] += (__uint128_t)(uint64_t)Vj->D[2 * n] * + (__uint128_t)(uint64_t)Vk->D[2 * n]; + break; + default: + g_assert_not_reached(); + } +} + +static void do_vmaddwod_u(vec_t *Vd, vec_t *Vj, vec_t *Vk, int bit, int n) +{ + switch (bit) { + case 16: + Vd->H[n] += (uint8_t)Vj->B[2 * n + 1] * (uint8_t)Vk->B[2 * n + 1]; + break; + case 32: + Vd->W[n] += (uint16_t)Vj->H[2 * n + 1] * (uint16_t)Vk->H[2 * n + 1]; + break; + case 64: + Vd->D[n] += (uint64_t)(uint32_t)Vj->W[2 * n + 1] * + (uint64_t)(uint32_t)Vk->W[2 * n + 1]; + break; + case 128: + Vd->Q[n] += (__uint128_t)(uint64_t)Vj->D[2 * n + 1] * + (__uint128_t)(uint64_t)Vk->D[2 * n + 1]; + break; + default: + g_assert_not_reached(); + } +} + +static void do_vmaddwev_u_s(vec_t *Vd, vec_t *Vj, vec_t *Vk, int bit, int n) +{ + switch (bit) { + case 16: + Vd->H[n] += (uint8_t)Vj->B[2 * n] * Vk->B[2 * n]; + break; + case 32: + Vd->W[n] += (uint16_t)Vj->H[2 * n] * Vk->H[2 * n]; + break; + case 64: + Vd->D[n] += (int64_t)(uint32_t)Vj->W[2 * n] * (int64_t)Vk->W[2 * n]; + break; + case 128: + Vd->Q[n] += (__int128_t)(uint64_t)Vj->D[2 * n] * + (__int128_t)Vk->D[2 * n]; + break; + default: + g_assert_not_reached(); + } +} + +static void do_vmaddwod_u_s(vec_t *Vd, vec_t *Vj, vec_t *Vk, int bit, int n) +{ + switch (bit) { + case 16: + Vd->H[n] += (uint8_t)Vj->B[2 * n + 1] * Vk->B[2 * n + 1]; + break; + case 32: + Vd->W[n] += (uint16_t)Vj->H[2 * n + 1] * Vk->H[2 * n + 1]; + break; + case 64: + Vd->D[n] += (int64_t)(uint32_t)Vj->W[2 * n + 1] * + (int64_t)Vk->W[2 * n + 1]; + break; + case 128: + Vd->Q[n] += (__int128_t)(uint64_t)Vj->D[2 * n + 1] * + (__int128_t)Vk->D[2 * n + 1]; + break; + default: + g_assert_not_reached(); + } +} + +DO_HELPER_VVV(vmaddwev_h_b, 16, helper_vvv, do_vmaddwev_s) +DO_HELPER_VVV(vmaddwev_w_h, 32, helper_vvv, do_vmaddwev_s) +DO_HELPER_VVV(vmaddwev_d_w, 64, helper_vvv, do_vmaddwev_s) +DO_HELPER_VVV(vmaddwev_q_d, 128, helper_vvv, do_vmaddwev_s) +DO_HELPER_VVV(vmaddwod_h_b, 16, helper_vvv, do_vmaddwod_s) +DO_HELPER_VVV(vmaddwod_w_h, 32, helper_vvv, do_vmaddwod_s) +DO_HELPER_VVV(vmaddwod_d_w, 64, helper_vvv, do_vmaddwod_s) +DO_HELPER_VVV(vmaddwod_q_d, 128, helper_vvv, do_vmaddwod_s) +DO_HELPER_VVV(vmaddwev_h_bu, 16, helper_vvv, do_vmaddwev_u) +DO_HELPER_VVV(vmaddwev_w_hu, 32, helper_vvv, do_vmaddwev_u) +DO_HELPER_VVV(vmaddwev_d_wu, 64, helper_vvv, do_vmaddwev_u) +DO_HELPER_VVV(vmaddwev_q_du, 128, helper_vvv, do_vmaddwev_u) +DO_HELPER_VVV(vmaddwod_h_bu, 16, helper_vvv, do_vmaddwod_u) +DO_HELPER_VVV(vmaddwod_w_hu, 32, helper_vvv, do_vmaddwod_u) +DO_HELPER_VVV(vmaddwod_d_wu, 64, helper_vvv, do_vmaddwod_u) +DO_HELPER_VVV(vmaddwod_q_du, 128, helper_vvv, do_vmaddwod_u) +DO_HELPER_VVV(vmaddwev_h_bu_b, 16, helper_vvv, do_vmaddwev_u_s) +DO_HELPER_VVV(vmaddwev_w_hu_h, 32, helper_vvv, do_vmaddwev_u_s) +DO_HELPER_VVV(vmaddwev_d_wu_w, 64, helper_vvv, do_vmaddwev_u_s) +DO_HELPER_VVV(vmaddwev_q_du_d, 128, helper_vvv, do_vmaddwev_u_s) +DO_HELPER_VVV(vmaddwod_h_bu_b, 16, helper_vvv, do_vmaddwod_u_s) +DO_HELPER_VVV(vmaddwod_w_hu_h, 32, helper_vvv, do_vmaddwod_u_s) +DO_HELPER_VVV(vmaddwod_d_wu_w, 64, helper_vvv, do_vmaddwod_u_s) +DO_HELPER_VVV(vmaddwod_q_du_d, 128, helper_vvv, do_vmaddwod_u_s) -- 2.31.1