Currently, the shuffle in which LoongArch selects two vectors at
corresponding positions is implemented through the [x]vshuf instruction,
but this will introduce additional index copies. In this case, the
[x]vbitsel.v instruction can be used for optimization.
gcc/ChangeLog:
* config/loongarch/lasx.md (lasx_xvbitsel_<lasxfmt_f>): Remove.
* config/loongarch/loongarch-builtins.cc (CODE_FOR_lsx_vbitsel_v):
Adjust.
(CODE_FOR_lasx_xvbitsel_v): Ditto.
* config/loongarch/loongarch.cc (loongarch_is_bitsel_pattern):
Add new check function.
(loongarch_expand_vec_perm_bitsel): Add new implement function.
(loongarch_expand_lsx_shuffle): Adjust.
(loongarch_expand_vec_perm_const): Add new optimize case.
* config/loongarch/lsx.md (lsx_vbitsel_<lsxfmt>): Adjust insn
pattern mode.
* config/loongarch/simd.md (@simd_vbitsel<mode>): New
define_insn template.
gcc/testsuite/ChangeLog:
* gcc.target/loongarch/vector/lasx/vec_perm-xvshuf.c: Move to...
* gcc.target/loongarch/vector/lasx/vec_perm-xvbitsel.c: ...here.
* gcc.target/loongarch/vector/lasx/vec_perm-vbitsel.c: New test.
---
gcc/config/loongarch/lasx.md | 12 ---
gcc/config/loongarch/loongarch-builtins.cc | 4 +-
gcc/config/loongarch/loongarch.cc | 81 +++++++++++++++++++
gcc/config/loongarch/lsx.md | 12 ---
gcc/config/loongarch/simd.md | 13 +++
.../loongarch/vector/lasx/vec_perm-vbitsel.c | 17 ++++
...{vec_perm-xvshuf.c => vec_perm-xvbitsel.c} | 4 +-
7 files changed, 115 insertions(+), 28 deletions(-)
create mode 100644
gcc/testsuite/gcc.target/loongarch/vector/lasx/vec_perm-vbitsel.c
rename gcc/testsuite/gcc.target/loongarch/vector/lasx/{vec_perm-xvshuf.c =>
vec_perm-xvbitsel.c} (77%)
diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
index c8749d1a338..85fbb273e48 100644
--- a/gcc/config/loongarch/lasx.md
+++ b/gcc/config/loongarch/lasx.md
@@ -1183,18 +1183,6 @@ (define_insn "lasx_xvbitrevi_<lasxfmt>"
[(set_attr "type" "simd_bit")
(set_attr "mode" "<MODE>")])
-(define_insn "lasx_xvbitsel_<lasxfmt_f>"
- [(set (match_operand:LASX 0 "register_operand" "=f")
- (ior:LASX (and:LASX (not:LASX
- (match_operand:LASX 3 "register_operand" "f"))
- (match_operand:LASX 1 "register_operand" "f"))
- (and:LASX (match_dup 3)
- (match_operand:LASX 2 "register_operand" "f"))))]
- "ISA_HAS_LASX"
- "xvbitsel.v\t%u0,%u1,%u2,%u3"
- [(set_attr "type" "simd_bitmov")
- (set_attr "mode" "<MODE>")])
-
(define_insn "lasx_xvbitseli_b"
[(set (match_operand:V32QI 0 "register_operand" "=f")
(ior:V32QI (and:V32QI (not:V32QI
diff --git a/gcc/config/loongarch/loongarch-builtins.cc
b/gcc/config/loongarch/loongarch-builtins.cc
index 6c914c07d29..2b6f99a8345 100644
--- a/gcc/config/loongarch/loongarch-builtins.cc
+++ b/gcc/config/loongarch/loongarch-builtins.cc
@@ -247,7 +247,7 @@ AVAIL_ALL (lasx_frecipe, ISA_HAS_LASX && ISA_HAS_FRECIPE)
#define CODE_FOR_lsx_vandi_b CODE_FOR_andv16qi3
#define CODE_FOR_lsx_bnz_v CODE_FOR_lsx_bnz_v_b
#define CODE_FOR_lsx_bz_v CODE_FOR_lsx_bz_v_b
-#define CODE_FOR_lsx_vbitsel_v CODE_FOR_lsx_vbitsel_b
+#define CODE_FOR_lsx_vbitsel_v CODE_FOR_simd_vbitselv16qi
#define CODE_FOR_lsx_vseqi_b CODE_FOR_lsx_vseq_b
#define CODE_FOR_lsx_vseqi_h CODE_FOR_lsx_vseq_h
#define CODE_FOR_lsx_vseqi_w CODE_FOR_lsx_vseq_w
@@ -568,7 +568,7 @@ AVAIL_ALL (lasx_frecipe, ISA_HAS_LASX && ISA_HAS_FRECIPE)
#define CODE_FOR_lasx_xvaddi_du CODE_FOR_addv4di3
#define CODE_FOR_lasx_xvand_v CODE_FOR_andv32qi3
#define CODE_FOR_lasx_xvandi_b CODE_FOR_andv32qi3
-#define CODE_FOR_lasx_xvbitsel_v CODE_FOR_lasx_xvbitsel_b
+#define CODE_FOR_lasx_xvbitsel_v CODE_FOR_simd_vbitselv32qi
#define CODE_FOR_lasx_xvseqi_b CODE_FOR_lasx_xvseq_b
#define CODE_FOR_lasx_xvseqi_h CODE_FOR_lasx_xvseq_h
#define CODE_FOR_lasx_xvseqi_w CODE_FOR_lasx_xvseq_w
diff --git a/gcc/config/loongarch/loongarch.cc
b/gcc/config/loongarch/loongarch.cc
index e7cb798df8b..76011eec1e5 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -8934,9 +8934,31 @@ loongarch_is_imm_set_shuffle (struct expand_vec_perm_d
*d)
return true;
}
+/* Check if the d->perm meets the requirements of the x|vbitsel.v insn. */
+static bool
+loongarch_is_bitsel_pattern (struct expand_vec_perm_d *d)
+{
+ bool result = true;
+
+ for (int i = 0; i < d->nelt; i++)
+ {
+ unsigned char buf = d->perm[i];
+ if ((buf % d->nelt) != i)
+ {
+ result = false;
+ break;
+ }
+ }
+
+ return result;
+}
+
static bool
loongarch_expand_vec_perm_even_odd (struct expand_vec_perm_d *);
+static bool
+loongarch_expand_vec_perm_bitsel (struct expand_vec_perm_d *);
+
/* Try to match and expand all kinds of 128-bit const vector permutation
cases. */
@@ -8952,6 +8974,9 @@ loongarch_expand_lsx_shuffle (struct expand_vec_perm_d *d)
if (loongarch_expand_vec_perm_even_odd (d))
return true;
+ if (loongarch_expand_vec_perm_bitsel (d))
+ return true;
+
return loongarch_try_expand_lsx_vshuf_const (d);
}
@@ -9379,6 +9404,59 @@ loongarch_expand_vec_perm_1 (rtx operands[])
gen_lowpart (GET_MODE (operands[0]), target));
}
+/* Try to use the x|vbitsel.v insn to optimize the vector shuffle, which
+ can reduce one copy insn in the loop compared to x|vshuff. */
+static bool
+loongarch_expand_vec_perm_bitsel (struct expand_vec_perm_d *d)
+{
+ gcc_assert (ISA_HAS_LSX || ISA_HAS_LASX);
+
+ if (!loongarch_is_bitsel_pattern (d))
+ return false;
+
+ if (d->testing_p)
+ return true;
+
+ int i, val;
+ rtx tmp, tmp2, sel, op0, op1, target;
+ rtx rperm[MAX_VECT_LEN];
+
+ for (i = 0; i < d->nelt; i += 1)
+ {
+ /* Here -1 means that all bits of the corresponding type are 1
+ (including the sign bit). */
+ val = d->perm[i] >= d->nelt ? -1 : 0;
+ rperm[i] = GEN_INT (val);
+ }
+
+ tmp2 = gen_reg_rtx (d->vmode);
+ machine_mode vimode = mode_for_vector
+ (int_mode_for_size (GET_MODE_BITSIZE
+ (GET_MODE_INNER
+ (d->vmode)), 0).require (), d->nelt).require ();
+
+ sel = gen_rtx_CONST_VECTOR (vimode, gen_rtvec_v (d->nelt, rperm));
+ if (GET_MODE_CLASS (d->vmode) == MODE_VECTOR_FLOAT)
+ {
+ /* Because the x|vbitsel.v insn pattern requires that all src
+ operands and dest operands are of the same type, they need to
+ be type-converted. */
+ tmp = simplify_gen_subreg (vimode, tmp2, d->vmode, 0);
+ emit_move_insn (tmp, sel);
+ }
+ else
+ emit_move_insn (tmp2, sel);
+
+ target = d->target;
+ op0 = d->op0;
+ op1 = d->one_vector_p ? d->op0 : d->op1;
+
+ emit_insn (gen_simd_vbitsel (d->vmode, target, op0, op1,
+ tmp2));
+
+ return true;
+}
+
/* Following are the assist function for const vector permutation support. */
static bool
loongarch_is_quad_duplicate (struct expand_vec_perm_d *d)
@@ -9855,6 +9933,9 @@ loongarch_expand_vec_perm_const (struct expand_vec_perm_d
*d)
return true;
}
+ if (loongarch_expand_vec_perm_bitsel (d))
+ return true;
+
if (loongarch_if_match_xvshuffle (d))
{
if (d->testing_p)
diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md
index 3b06d2e20cf..21f883752fe 100644
--- a/gcc/config/loongarch/lsx.md
+++ b/gcc/config/loongarch/lsx.md
@@ -925,18 +925,6 @@ (define_insn "lsx_vbitrevi_<lsxfmt>"
[(set_attr "type" "simd_bit")
(set_attr "mode" "<MODE>")])
-(define_insn "lsx_vbitsel_<lsxfmt>"
- [(set (match_operand:ILSX 0 "register_operand" "=f")
- (ior:ILSX (and:ILSX (not:ILSX
- (match_operand:ILSX 3 "register_operand" "f"))
- (match_operand:ILSX 1 "register_operand" "f"))
- (and:ILSX (match_dup 3)
- (match_operand:ILSX 2 "register_operand" "f"))))]
- "ISA_HAS_LSX"
- "vbitsel.v\t%w0,%w1,%w2,%w3"
- [(set_attr "type" "simd_bitmov")
- (set_attr "mode" "<MODE>")])
-
(define_insn "lsx_vbitseli_b"
[(set (match_operand:V16QI 0 "register_operand" "=f")
(ior:V16QI (and:V16QI (not:V16QI
diff --git a/gcc/config/loongarch/simd.md b/gcc/config/loongarch/simd.md
index 0ad10683cb5..8541b772580 100644
--- a/gcc/config/loongarch/simd.md
+++ b/gcc/config/loongarch/simd.md
@@ -1176,6 +1176,19 @@ (define_expand "<simd_isa>_<x>vshuf_<simdfmt><_f>"
DONE;
})
+(define_insn "@simd_vbitsel<mode>"
+ [(set (match_operand:ALLVEC 0 "register_operand" "=f")
+ (ior:ALLVEC
+ (and:ALLVEC
+ (not:ALLVEC (match_operand:ALLVEC 3 "register_operand" "f"))
+ (match_operand:ALLVEC 1 "register_operand" "f"))
+ (and:ALLVEC (match_dup 3)
+ (match_operand:ALLVEC 2 "register_operand" "f"))))]
+ ""
+ "<x>vbitsel.v\t%<wu>0,%<wu>1,%<wu>2,%<wu>3"
+ [(set_attr "type" "simd_bitmov")
+ (set_attr "mode" "<MODE>")])
+
; The LoongArch SX Instructions.
(include "lsx.md")
diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/vec_perm-vbitsel.c
b/gcc/testsuite/gcc.target/loongarch/vector/lasx/vec_perm-vbitsel.c
new file mode 100644
index 00000000000..7a5118273c5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/vec_perm-vbitsel.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mlsx" } */
+/* { dg-final { scan-assembler-not "vshuf.w" } } */
+/* { dg-final { scan-assembler-not "vori.b" } } */
+/* { dg-final { scan-assembler "vbitsel.v" } } */
+
+void
+foo (int a[], int b[], int c[])
+{
+ for (int i = 0; i < 100; i += 4)
+ {
+ c[i + 0] = a[i + 0] + b[i + 0];
+ c[i + 1] = a[i + 1] - b[i + 1];
+ c[i + 2] = a[i + 2] - b[i + 2];
+ c[i + 3] = a[i + 3] + b[i + 3];
+ }
+}
diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/vec_perm-xvshuf.c
b/gcc/testsuite/gcc.target/loongarch/vector/lasx/vec_perm-xvbitsel.c
similarity index 77%
rename from gcc/testsuite/gcc.target/loongarch/vector/lasx/vec_perm-xvshuf.c
rename to gcc/testsuite/gcc.target/loongarch/vector/lasx/vec_perm-xvbitsel.c
index 6b19c2c2fd8..b3808b550e5 100644
--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/vec_perm-xvshuf.c
+++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/vec_perm-xvbitsel.c
@@ -1,8 +1,8 @@
/* { dg-do compile } */
/* { dg-options "-O3 -mlasx" } */
-/* { dg-final { scan-assembler "xvshuf.w" } } */
+/* { dg-final { scan-assembler-not "xvshuf.w" } } */
/* { dg-final { scan-assembler-not "xvperm.w" } } */
-/* { dg-final { scan-assembler-not "xvbitsel.v" } } */
+/* { dg-final { scan-assembler "xvbitsel.v" } } */
void
foo (int a[], int b[], int c[])
--
2.38.1