[PATCH] LoongArch: Optimize statement to use bstrins.{w|d}

Deng Jianbo Thu, 13 Nov 2025 18:22:46 -0800

For statement (a << imm1) | (b & imm2), in case the imm2 equals to
(1 << imm1) - 1, it can be optimized to use bstrins.{w|d} instruction.


gcc/ChangeLog:

        * config/loongarch/loongarch.md
        (*bstrins_w_for_ior_ashift_and_extend): New template.
        (*bstrins_d_for_ior_ashift_and): New template.
        * config/loongarch/predicates.md (const_uimm63_operand): New
        predicate.

gcc/testsuite/ChangeLog:

        * gcc.target/loongarch/bstrins-5.c: New test.
        * gcc.target/loongarch/bstrins-6.c: New test.

---
 gcc/config/loongarch/loongarch.md             | 59 +++++++++++++++++++
 gcc/config/loongarch/predicates.md            |  4 ++
 .../gcc.target/loongarch/bstrins-5.c          | 14 +++++
 .../gcc.target/loongarch/bstrins-6.c          | 14 +++++
 4 files changed, 91 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/bstrins-5.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/bstrins-6.c

diff --git a/gcc/config/loongarch/loongarch.md 
b/gcc/config/loongarch/loongarch.md
index e23c973c38b..763d514cac7 100644
--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
@@ -1637,6 +1637,65 @@ (define_insn_and_split "*bstrins_<mode>_for_ior_mask"
     operands[3] = tmp;
   })
 
+;; Optimize (a << imm1) | (b & imm2) to use bstrins.w instruction, both a and b
+;; should be 32bits, imm2 value should be equal to (1LL << imm1) - 1.
+;; For example: (a << 1) | (b & 1)
+;;     slli.w  $r12,$r12,1
+;;     andi    $r13,$r13,1
+;;     or  $r12,$r12,$r13
+;; Optimized to use bstrins.w instruction as below:
+;;     bstrins.w   $r13,$r12,31,1
+(define_insn_and_split "*bstrins_w_for_ior_ashift_and_extend"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+       (any_or_plus:DI
+         (and:DI (match_operand:DI 1 "register_operand" "r")
+                 (match_operand:SI 2 "const_int_operand" "i"))
+         (ashift:DI
+           (sign_extract:DI
+             (match_operand:DI 3 "register_operand" "r")
+             (match_operand:SI 4 "const_uimm5_operand")
+             (const_int 0))
+           (match_operand:SI 5 "const_uimm5_operand"))))]
+  "TARGET_64BIT && loongarch_pre_reload_split ()
+   && !reg_overlap_mentioned_p (operands[0], operands[3])
+   && INTVAL (operands[2]) != 0 && INTVAL (operands[5]) != 0
+   && INTVAL (operands[2]) == (1LL << INTVAL (operands[5])) - 1
+   && INTVAL (operands[4]) + INTVAL (operands[5]) == 0x20"
+  "#"
+  "&& true"
+  [(const_int 0)]
+  {
+    emit_move_insn (operands[0], operands[1]);
+    rtx len = GEN_INT (32 - INTVAL (operands[5]));
+    rtx dest = gen_lowpart (SImode, operands[0]);
+    rtx op = gen_lowpart (SImode, operands[3]);
+    emit_insn (gen_insvsi (dest, len, operands[5], op));
+  })
+
+;; Optimize (a << imm1) | (b & imm2) to use bstrins.d instruction, the size of
+;; a and b are 8 bits, 16 bits or 64bits, imm2 value should be equal to
+;; (1LL << imm1) - 1.
+(define_insn_and_split "*bstrins_d_for_ior_ashift_and"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+       (any_or_plus:DI
+         (and:DI (match_operand:DI 1 "register_operand" "r")
+                 (match_operand:DI 2 "const_int_operand" "i"))
+         (ashift:DI
+             (match_operand:DI 3 "register_operand" "r")
+             (match_operand:DI 4 "const_uimm63_operand"))))]
+  "TARGET_64BIT && loongarch_pre_reload_split ()
+   && !reg_overlap_mentioned_p (operands[0], operands[3])
+   && INTVAL (operands[2]) != 0 && INTVAL (operands[4]) != 0
+   && INTVAL (operands[2]) == (1LL << INTVAL (operands[4])) - 1"
+  "#"
+  "&& true"
+  [(set (match_dup 0) (match_dup 1))
+   (set (zero_extract:DI (match_dup 0) (match_dup 2) (match_dup 4))
+       (match_dup 3))]
+  {
+    operands[2] = GEN_INT (64 - INTVAL (operands[4]));
+  })
+
 (define_insn "and_load_zero_extend<mode>"
   [(set (match_operand:X 0 "register_operand" "=r,r,r,r,r,r")
        (and:X (match_operand:X 1 "memory_operand" "%m,m,m,k,k,k")
diff --git a/gcc/config/loongarch/predicates.md 
b/gcc/config/loongarch/predicates.md
index 8460618b501..c9b0fdbbc54 100644
--- a/gcc/config/loongarch/predicates.md
+++ b/gcc/config/loongarch/predicates.md
@@ -135,6 +135,10 @@ (define_predicate "const_imm5_operand"
   (and (match_code "const_int")
        (match_test "IN_RANGE (INTVAL (op), -16, 15)")))
 
+(define_predicate "const_uimm63_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 0, 63)")))
+
 (define_predicate "const_imm10_operand"
   (and (match_code "const_int")
        (match_test "IMM10_OPERAND (INTVAL (op))")))
diff --git a/gcc/testsuite/gcc.target/loongarch/bstrins-5.c 
b/gcc/testsuite/gcc.target/loongarch/bstrins-5.c
new file mode 100644
index 00000000000..199b16f9bb8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/bstrins-5.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=loongarch64 -mabi=lp64d" } */
+/* { dg-final { scan-assembler 
"foo:.*\tbstrins\\.w\t\\\$r\[0-9\]+,\\\$r\[0-9\]+,31,1.*foo" } } */
+/* { dg-final { scan-assembler 
"bar:.*\tbstrins\\.w\t\\\$r\[0-9\]+,\\\$r\[0-9\]+,31,31.*bar" } } */
+
+int foo (int a, int b)
+{
+  return (a << 1) | (b & 1);
+}
+
+int bar (int a, int b)
+{
+  return (a << 31) | (b & 0x7fffffff);
+}
diff --git a/gcc/testsuite/gcc.target/loongarch/bstrins-6.c 
b/gcc/testsuite/gcc.target/loongarch/bstrins-6.c
new file mode 100644
index 00000000000..f2fa3511780
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/bstrins-6.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=loongarch64 -mabi=lp64d" } */
+/* { dg-final { scan-assembler 
"foo:.*\tbstrins\\.d\t\\\$r\[0-9\]+,\\\$r\[0-9\]+,63,1.*foo" } } */
+/* { dg-final { scan-assembler 
"bar:.*\tbstrins\\.d\t\\\$r\[0-9\]+,\\\$r\[0-9\]+,63,15.*bar" } } */
+
+long foo (long a, long b)
+{
+  return (a << 1) | (b & 1);
+}
+
+short bar (short a, short b)
+{
+  return (a << 15) | (b & 0x7fff);
+}
-- 
2.20.1

[PATCH] LoongArch: Optimize statement to use bstrins.{w|d}

Reply via email to