If mask is a constant with value ((1 << N) - 1) << M we can perform this
optimization.

gcc/ChangeLog:

        PR target/111252
        * config/loongarch/loongarch-protos.h
        (loongarch_pre_reload_split): Declare new function.
        (loongarch_use_bstrins_for_ior_with_mask): Likewise.
        * config/loongarch/loongarch.cc
        (loongarch_pre_reload_split): Implement.
        (loongarch_use_bstrins_for_ior_with_mask): Likewise.
        * config/loongarch/predicates.md (ins_zero_bitmask_operand):
        New predicate.
        * config/loongarch/loongarch.md (bstrins_<mode>_for_mask):
        New define_insn_and_split.
        (bstrins_<mode>_for_ior_mask): Likewise.
        (define_peephole2): Further optimize code sequence produced by
        bstrins_<mode>_for_ior_mask if possible.

gcc/testsuite/ChangeLog:

        * g++.target/loongarch/bstrins-compile.C: New test.
        * g++.target/loongarch/bstrins-run.C: New test.
---
 gcc/config/loongarch/loongarch-protos.h       |  4 +-
 gcc/config/loongarch/loongarch.cc             | 36 ++++++++
 gcc/config/loongarch/loongarch.md             | 91 +++++++++++++++++++
 gcc/config/loongarch/predicates.md            |  8 ++
 .../g++.target/loongarch/bstrins-compile.C    | 22 +++++
 .../g++.target/loongarch/bstrins-run.C        | 65 +++++++++++++
 6 files changed, 225 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/g++.target/loongarch/bstrins-compile.C
 create mode 100644 gcc/testsuite/g++.target/loongarch/bstrins-run.C

diff --git a/gcc/config/loongarch/loongarch-protos.h 
b/gcc/config/loongarch/loongarch-protos.h
index f4430d0d418..251011c5414 100644
--- a/gcc/config/loongarch/loongarch-protos.h
+++ b/gcc/config/loongarch/loongarch-protos.h
@@ -56,7 +56,7 @@ enum loongarch_symbol_type {
 };
 #define NUM_SYMBOL_TYPES (SYMBOL_TLSLDM + 1)
 
-/* Routines implemented in loongarch.c.  */
+/* Routines implemented in loongarch.cc.  */
 extern rtx loongarch_emit_move (rtx, rtx);
 extern HOST_WIDE_INT loongarch_initial_elimination_offset (int, int);
 extern void loongarch_expand_prologue (void);
@@ -163,6 +163,8 @@ extern const char *current_section_name (void);
 extern unsigned int current_section_flags (void);
 extern bool loongarch_use_ins_ext_p (rtx, HOST_WIDE_INT, HOST_WIDE_INT);
 extern bool loongarch_check_zero_div_p (void);
+extern bool loongarch_pre_reload_split (void);
+extern int loongarch_use_bstrins_for_ior_with_mask (machine_mode, rtx *);
 
 union loongarch_gen_fn_ptrs
 {
diff --git a/gcc/config/loongarch/loongarch.cc 
b/gcc/config/loongarch/loongarch.cc
index aeb37f0f2f7..6698414281e 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -5482,6 +5482,42 @@ loongarch_use_ins_ext_p (rtx op, HOST_WIDE_INT width, 
HOST_WIDE_INT bitpos)
   return true;
 }
 
+/* Predicate for pre-reload splitters with associated instructions,
+   which can match any time before the split1 pass (usually combine),
+   then are unconditionally split in that pass and should not be
+   matched again afterwards.  */
+
+bool loongarch_pre_reload_split (void)
+{
+  return (can_create_pseudo_p ()
+         && !(cfun->curr_properties & PROP_rtl_split_insns));
+}
+
+/* Check if we can use bstrins.<d> for
+   op0 = (op1 & op2) | (op3 & op4)
+   where op0, op1, op3 are regs, and op2, op4 are integer constants.  */
+int
+loongarch_use_bstrins_for_ior_with_mask (machine_mode mode, rtx *op)
+{
+  unsigned HOST_WIDE_INT mask1 = UINTVAL (op[2]);
+  unsigned HOST_WIDE_INT mask2 = UINTVAL (op[4]);
+
+  if (mask1 != ~mask2 || !mask1 || !mask2)
+    return 0;
+
+  /* Try to avoid a right-shift.  */
+  if (low_bitmask_len (mode, mask1) != -1)
+    return -1;
+
+  if (low_bitmask_len (mode, mask2 >> (ffs_hwi (mask2) - 1)) != -1)
+    return 1;
+
+  if (low_bitmask_len (mode, mask1 >> (ffs_hwi (mask1) - 1)) != -1)
+    return -1;
+
+  return 0;
+}
+
 /* Print the text for PRINT_OPERAND punctation character CH to FILE.
    The punctuation characters are:
 
diff --git a/gcc/config/loongarch/loongarch.md 
b/gcc/config/loongarch/loongarch.md
index 2308db16902..75f641b38ee 100644
--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
@@ -1322,6 +1322,97 @@ (define_insn "and<mode>3_extended"
   [(set_attr "move_type" "pick_ins")
    (set_attr "mode" "<MODE>")])
 
+(define_insn_and_split "*bstrins_<mode>_for_mask"
+  [(set (match_operand:GPR 0 "register_operand")
+       (and:GPR (match_operand:GPR 1 "register_operand")
+                (match_operand:GPR 2 "ins_zero_bitmask_operand")))]
+  ""
+  "#"
+  ""
+  [(set (match_dup 0) (match_dup 1))
+   (set (zero_extract:GPR (match_dup 0) (match_dup 2) (match_dup 3))
+       (const_int 0))]
+  {
+    unsigned HOST_WIDE_INT mask = ~UINTVAL (operands[2]);
+    int lo = ffs_hwi (mask) - 1;
+    int len = low_bitmask_len (<MODE>mode, mask >> lo);
+
+    len = MIN (len, GET_MODE_BITSIZE (<MODE>mode) - lo);
+    operands[2] = GEN_INT (len);
+    operands[3] = GEN_INT (lo);
+  })
+
+(define_insn_and_split "*bstrins_<mode>_for_ior_mask"
+  [(set (match_operand:GPR 0 "register_operand")
+       (ior:GPR (and:GPR (match_operand:GPR 1 "register_operand")
+                          (match_operand:GPR 2 "const_int_operand"))
+                (and:GPR (match_operand:GPR 3 "register_operand")
+                         (match_operand:GPR 4 "const_int_operand"))))]
+  "loongarch_pre_reload_split () && \
+   loongarch_use_bstrins_for_ior_with_mask (<MODE>mode, operands)"
+  "#"
+  ""
+  [(set (match_dup 0) (match_dup 1))
+   (set (zero_extract:GPR (match_dup 0) (match_dup 2) (match_dup 4))
+       (match_dup 3))]
+  {
+    if (loongarch_use_bstrins_for_ior_with_mask (<MODE>mode, operands) < 0)
+      {
+       std::swap (operands[1], operands[3]);
+       std::swap (operands[2], operands[4]);
+      }
+
+    unsigned HOST_WIDE_INT mask = ~UINTVAL (operands[2]);
+    int lo = ffs_hwi (mask) - 1;
+    int len = low_bitmask_len (<MODE>mode, mask >> lo);
+
+    len = MIN (len, GET_MODE_BITSIZE (<MODE>mode) - lo);
+    operands[2] = GEN_INT (len);
+    operands[4] = GEN_INT (lo);
+
+    if (lo)
+      {
+       rtx tmp = gen_reg_rtx (<MODE>mode);
+       emit_move_insn (tmp, gen_rtx_ASHIFTRT(<MODE>mode, operands[3],
+                                             GEN_INT (lo)));
+       operands[3] = tmp;
+      }
+  })
+
+;; We always avoid the shift operation in bstrins_<mode>_for_ior_mask
+;; if possible, but the result may be sub-optimal when one of the masks
+;; is (1 << N) - 1 and one of the src register is the dest register.
+;; For example:
+;;     move            t0, a0
+;;     move            a0, a1
+;;     bstrins.d       a0, t0, 42, 0
+;;     ret
+;; using a shift operation would be better:
+;;     srai.d          t0, a1, 43
+;;     bstrins.d       a0, t0, 63, 43
+;;     ret
+;; unfortunately we cannot figure it out in split1: before reload we cannot
+;; know if the dest register is one of the src register.  Fix it up in
+;; peephole2.
+(define_peephole2
+  [(set (match_operand:GPR 0 "register_operand")
+       (match_operand:GPR 1 "register_operand"))
+   (set (match_dup 1) (match_operand:GPR 2 "register_operand"))
+   (set (zero_extract:GPR (match_dup 1)
+                         (match_operand:SI 3 "const_int_operand")
+                         (const_int 0))
+       (match_dup 0))]
+  "peep2_reg_dead_p (3, operands[0])"
+  [(const_int 0)]
+  {
+    int len = GET_MODE_BITSIZE (<MODE>mode) - INTVAL (operands[3]);
+
+    emit_insn (gen_ashr<mode>3 (operands[0], operands[2], operands[3]));
+    emit_insn (gen_insv<mode> (operands[1], GEN_INT (len), operands[3],
+                              operands[0]));
+    DONE;
+  })
+
 (define_insn "*iorhi3"
   [(set (match_operand:HI 0 "register_operand" "=r,r")
        (ior:HI (match_operand:HI 1 "register_operand" "%r,r")
diff --git a/gcc/config/loongarch/predicates.md 
b/gcc/config/loongarch/predicates.md
index f430629825e..499518b82ba 100644
--- a/gcc/config/loongarch/predicates.md
+++ b/gcc/config/loongarch/predicates.md
@@ -408,6 +408,14 @@ (define_predicate "fcc_reload_operand"
 (define_predicate "muldiv_target_operand"
                (match_operand 0 "register_operand"))
 
+(define_predicate "ins_zero_bitmask_operand"
+  (and (match_code "const_int")
+       (match_test "INTVAL (op) != -1")
+       (match_test "INTVAL (op) & 1")
+       (match_test "low_bitmask_len (mode, \
+                                    ~UINTVAL (op) | (~UINTVAL(op) - 1)) \
+                   > 12")))
+
 (define_predicate "const_call_insn_operand"
   (match_code "const,symbol_ref,label_ref")
 {
diff --git a/gcc/testsuite/g++.target/loongarch/bstrins-compile.C 
b/gcc/testsuite/g++.target/loongarch/bstrins-compile.C
new file mode 100644
index 00000000000..3c0db1de4c6
--- /dev/null
+++ b/gcc/testsuite/g++.target/loongarch/bstrins-compile.C
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-std=c++14 -O2 -march=loongarch64 -mabi=lp64d" } */
+/* { dg-final { scan-assembler "bstrins\\.d.*7,4" } } */
+/* { dg-final { scan-assembler "bstrins\\.d.*15,4" } } */
+/* { dg-final { scan-assembler "bstrins\\.d.*31,4" } } */
+/* { dg-final { scan-assembler "bstrins\\.d.*47,4" } } */
+/* { dg-final { scan-assembler "bstrins\\.d.*3,0" } } */
+
+typedef unsigned long u64;
+
+template <u64 mask>
+u64
+test (u64 a, u64 b)
+{
+  return (a & mask) | (b & ~mask);
+}
+
+template u64 test<0x0000'0000'0000'00f0l> (u64, u64);
+template u64 test<0x0000'0000'0000'fff0l> (u64, u64);
+template u64 test<0x0000'0000'ffff'fff0l> (u64, u64);
+template u64 test<0x0000'ffff'ffff'fff0l> (u64, u64);
+template u64 test<0xffff'ffff'ffff'fff0l> (u64, u64);
diff --git a/gcc/testsuite/g++.target/loongarch/bstrins-run.C 
b/gcc/testsuite/g++.target/loongarch/bstrins-run.C
new file mode 100644
index 00000000000..68913d5e0fc
--- /dev/null
+++ b/gcc/testsuite/g++.target/loongarch/bstrins-run.C
@@ -0,0 +1,65 @@
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+
+typedef unsigned long gr;
+
+template <int l, int r>
+struct mask {
+  enum { value = (1ul << r) - (1ul << l) };
+};
+
+template <int l>
+struct mask<l, sizeof (gr) * __CHAR_BIT__> {
+  enum { value = -(1ul << l) };
+};
+
+__attribute__ ((noipa)) void
+test (gr a, gr b, gr mask, gr out)
+{
+  if (((a & mask) | (b & ~mask)) != out)
+    __builtin_abort ();
+}
+
+__attribute__ ((noipa)) gr
+no_optimize (gr x)
+{
+  return x;
+}
+
+template <int l, int r>
+struct test1 {
+  static void
+  run (void)
+  {
+    gr m = mask<l, r>::value;
+    gr a = no_optimize (-1ul);
+    gr b = no_optimize (0);
+
+    test (a, b, m, (a & m) | (b & ~m));
+    test (a, b, ~m, (a & ~m) | (b & m));
+    test (a, 0, ~m, a & ~m);
+
+    test1<l, r + 1>::run ();
+  }
+};
+
+template <int l>
+struct test1<l, sizeof (gr) * __CHAR_BIT__ + 1> {
+  static void run (void) {}
+};
+
+template <int l>
+void
+test2 (void)
+{
+  test1<l, l + 1>::run ();
+  test2<l + 1> ();
+}
+
+template <> void test2<sizeof (gr) * __CHAR_BIT__> (void) {}
+
+int
+main ()
+{
+  test2<0> ();
+}
-- 
2.42.0

Reply via email to