gcc/
* config/loongarch/loongarch.cc (loongarch_expand_vec_perm_1):
Use lasx_xvpackev_h (mask * 2, mask * 2 + 1) to "expand" the
V4DI selector to V8SI.
---
gcc/config/loongarch/loongarch.cc | 40 ++++++++++---------------------
1 file changed, 12 insertions(+), 28 deletions(-)
diff --git a/gcc/config/loongarch/loongarch.cc
b/gcc/config/loongarch/loongarch.cc
index e97cc3b5848..73ff7e4274e 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -9090,10 +9090,9 @@ loongarch_expand_vec_perm_1 (rtx operands[])
rtx t1 = NULL;
rtx t2 = NULL;
rtx t3, t4, t5, t6, vt = NULL;
- rtx vec[32] = {NULL};
machine_mode mode = GET_MODE (op0);
machine_mode maskmode = GET_MODE (mask);
- int w, i;
+ int w;
/* Number of elements in the vector. */
w = GET_MODE_NUNITS (mode);
@@ -9111,36 +9110,21 @@ loongarch_expand_vec_perm_1 (rtx operands[])
{
maskmode = mode = V8SImode;
w = 8;
- t1 = gen_reg_rtx (maskmode);
/* Replicate the low bits of the V4DImode mask into V8SImode:
- mask = { A B C D }
- t1 = { A A B B C C D D }. */
- for (i = 0; i < w / 2; ++i)
- vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
- vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
- vt = force_reg (maskmode, vt);
- mask = gen_lowpart (maskmode, mask);
- emit_insn (gen_lasx_xvperm_w (t1, mask, vt));
-
- /* Multiply the shuffle indicies by two. */
- t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1,
- OPTAB_DIRECT);
-
- /* Add one to the odd shuffle indicies:
- t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
- for (i = 0; i < w / 2; ++i)
- {
- vec[i * 2] = const0_rtx;
- vec[i * 2 + 1] = const1_rtx;
- }
- vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
- vt = validize_mem (force_const_mem (maskmode, vt));
- t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
- OPTAB_DIRECT);
+ mask = lasx_xvpackev_w (mask * 2, mask * 2 + 1) */
+ t1 = expand_binop (V4DImode, add_optab, mask, mask, NULL_RTX,
+ false, OPTAB_DIRECT);
+ t2 = gen_const_vec_duplicate (V4DImode, CONST1_RTX (DImode));
+ t2 = expand_binop (V4DImode, add_optab, t1, t2, NULL_RTX,
+ true, OPTAB_DIRECT);
+ t1 = gen_lowpart (mode, t1);
+ t2 = gen_lowpart (mode, t2);
+ t3 = gen_reg_rtx (mode);
+ emit_insn (gen_lasx_xvpackev_w (t3, t1, t2));
/* Continue as if V8SImode (resp. V32QImode) was used initially. */
- operands[3] = mask = t1;
+ operands[3] = mask = t3;
target = gen_reg_rtx (mode);
op0 = gen_lowpart (mode, op0);
op1 = gen_lowpart (mode, op1);
--
2.51.2