The combiner attempts to optimize a zero-extension of a logical right shift
using zero_extract. We already utilize this optimization for those cases
that result in a single instructions.  Let's add a insn_and_split
pattern that also matches the generic case, where we can emit an
optimized sequence of a slli/srli.

Tested with SPEC CPU 2017 (rv64gc).

        PR 111501

gcc/ChangeLog:

        * config/riscv/riscv.md (*lshr<GPR:mode>3_zero_extend_4): New
        pattern for zero-extraction.

gcc/testsuite/ChangeLog:

        * gcc.target/riscv/pr111501.c: New test.
        * gcc.target/riscv/zero-extend-rshift-32.c: New test.
        * gcc.target/riscv/zero-extend-rshift-64.c: New test.
        * gcc.target/riscv/zero-extend-rshift.c: New test.

Signed-off-by: Christoph Müllner <christoph.muell...@vrull.eu>
---
 gcc/config/riscv/riscv.md                     |  30 +++++
 gcc/testsuite/gcc.target/riscv/pr111501.c     |  32 +++++
 .../gcc.target/riscv/zero-extend-rshift-32.c  |  37 ++++++
 .../gcc.target/riscv/zero-extend-rshift-64.c  |  63 ++++++++++
 .../gcc.target/riscv/zero-extend-rshift.c     | 119 ++++++++++++++++++
 5 files changed, 281 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/pr111501.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/zero-extend-rshift-32.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/zero-extend-rshift-64.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/zero-extend-rshift.c

diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index d4676507b45..80cbecb78e8 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -2792,6 +2792,36 @@ (define_insn "*lshrsi3_zero_extend_3"
   [(set_attr "type" "shift")
    (set_attr "mode" "SI")])
 
+;; Canonical form for a zero-extend of a logical right shift.
+;; Special cases are handled above.
+;; Skip for single-bit extraction (Zbs/XTheadBs) and th.extu (XTheadBb)
+(define_insn_and_split "*lshr<GPR:mode>3_zero_extend_4"
+  [(set (match_operand:GPR 0 "register_operand" "=r")
+        (zero_extract:GPR
+       (match_operand:GPR 1 "register_operand" " r")
+       (match_operand     2 "const_int_operand")
+       (match_operand     3 "const_int_operand")))
+   (clobber (match_scratch:GPR  4 "=&r"))]
+  "!((TARGET_ZBS || TARGET_XTHEADBS) && (INTVAL (operands[2]) == 1))
+   && !TARGET_XTHEADBB"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 4)
+     (ashift:GPR (match_dup 1) (match_dup 2)))
+   (set (match_dup 0)
+     (lshiftrt:GPR (match_dup 4) (match_dup 3)))]
+{
+  int regbits = GET_MODE_BITSIZE (GET_MODE (operands[0])).to_constant ();
+  int sizebits = INTVAL (operands[2]);
+  int startbits = INTVAL (operands[3]);
+  int lshamt = regbits - sizebits - startbits;
+  int rshamt = lshamt + startbits;
+  operands[2] = GEN_INT (lshamt);
+  operands[3] = GEN_INT (rshamt);
+}
+  [(set_attr "type" "shift")
+   (set_attr "mode" "<GPR:MODE>")])
+
 ;; Handle AND with 2^N-1 for N from 12 to XLEN.  This can be split into
 ;; two logical shifts.  Otherwise it requires 3 instructions: lui,
 ;; xor/addi/srli, and.
diff --git a/gcc/testsuite/gcc.target/riscv/pr111501.c 
b/gcc/testsuite/gcc.target/riscv/pr111501.c
new file mode 100644
index 00000000000..9355be242e7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/pr111501.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target rv64 } */
+/* { dg-options "-march=rv64gc" { target { rv64 } } } */
+/* { dg-skip-if "" { *-*-* } {"-O0" "-Os" "-Og" "-Oz" "-flto" } } */
+/* { dg-final { check-function-bodies "**" "" } } */
+/* { dg-allow-blank-lines-in-output 1 } */
+
+/*
+**do_shift:
+**    ...
+**    slli\ta[0-9],a[0-9],16
+**    srli\ta[0-9],a[0-9],48
+**    ...
+*/
+unsigned int
+do_shift(unsigned long csum)
+{
+  return (unsigned short)(csum >> 32);
+}
+
+/*
+**do_shift2:
+**    ...
+**    slli\ta[0-9],a[0-9],16
+**    srli\ta[0-9],a[0-9],48
+**    ...
+*/
+unsigned int
+do_shift2(unsigned long csum)
+{
+  return (csum << 16) >> 48;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/zero-extend-rshift-32.c 
b/gcc/testsuite/gcc.target/riscv/zero-extend-rshift-32.c
new file mode 100644
index 00000000000..2824d6fe074
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/zero-extend-rshift-32.c
@@ -0,0 +1,37 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target rv32 } */
+/* { dg-options "-march=rv32gc" } */
+/* { dg-skip-if "" { *-*-* } {"-O0" "-Os" "-Og" "-Oz" "-flto" } } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#define URT_ZE_UCT_RSHIFT_N_UAT(RT,CT,N,AT)                            \
+unsigned RT u##RT##_ze_u##CT##_rshift_##N##_u##AT(unsigned AT v)       \
+{                                                                      \
+    return (unsigned CT)(v >> N);                                      \
+}
+
+#define ULONG_ZE_USHORT_RSHIFT_N_ULONG(N) 
URT_ZE_UCT_RSHIFT_N_UAT(long,short,N,long)
+#define ULONG_ZE_UINT_RSHIFT_N_ULONG(N) 
URT_ZE_UCT_RSHIFT_N_UAT(long,int,N,long)
+
+/*
+**ulong_ze_ushort_rshift_9_ulong:
+**    slli\ta[0-9],a[0-9],7
+**    srli\ta[0-9],a[0-9],16
+**    ret
+*/
+ULONG_ZE_USHORT_RSHIFT_N_ULONG(9)
+
+/*
+**ulong_ze_ushort_rshift_14_ulong:
+**    slli\ta[0-9],a[0-9],2
+**    srli\ta[0-9],a[0-9],16
+**    ret
+*/
+ULONG_ZE_USHORT_RSHIFT_N_ULONG(14)
+
+/*
+**ulong_ze_uint_rshift_23_ulong:
+**    srli\ta[0-9],a[0-9],23
+**    ret
+*/
+ULONG_ZE_UINT_RSHIFT_N_ULONG(23)
diff --git a/gcc/testsuite/gcc.target/riscv/zero-extend-rshift-64.c 
b/gcc/testsuite/gcc.target/riscv/zero-extend-rshift-64.c
new file mode 100644
index 00000000000..ec5c2745561
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/zero-extend-rshift-64.c
@@ -0,0 +1,63 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target rv64 } */
+/* { dg-options "-march=rv64gc" } */
+/* { dg-skip-if "" { *-*-* } {"-O0" "-Os" "-Og" "-Oz" "-flto" } } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#define URT_ZE_UCT_RSHIFT_N_UAT(RT,CT,N,AT)                            \
+unsigned RT u##RT##_ze_u##CT##_rshift_##N##_u##AT(unsigned AT v)       \
+{                                                                      \
+    return (unsigned CT)(v >> N);                                      \
+}
+
+#define ULONG_ZE_USHORT_RSHIFT_N_ULONG(N) 
URT_ZE_UCT_RSHIFT_N_UAT(long,short,N,long)
+#define ULONG_ZE_UINT_RSHIFT_N_ULONG(N) 
URT_ZE_UCT_RSHIFT_N_UAT(long,int,N,long)
+#define UINT_ZE_USHORT_RSHIFT_N_UINT(N) 
URT_ZE_UCT_RSHIFT_N_UAT(int,short,N,int)
+#define ULONG_ZE_USHORT_RSHIFT_N_UINT(N) 
URT_ZE_UCT_RSHIFT_N_UAT(long,short,N,int)
+
+/*
+**ulong_ze_ushort_rshift_9_ulong:
+**    slli\ta[0-9],a[0-9],39
+**    srli\ta[0-9],a[0-9],48
+**    ret
+*/
+ULONG_ZE_USHORT_RSHIFT_N_ULONG(9)
+
+/*
+**ulong_ze_ushort_rshift_14_ulong:
+**    slli\ta[0-9],a[0-9],34
+**    srli\ta[0-9],a[0-9],48
+**    ret
+*/
+ULONG_ZE_USHORT_RSHIFT_N_ULONG(14)
+
+/*
+**ulong_ze_ushort_rshift_51_ulong:
+**    srli\ta[0-9],a[0-9],51
+**    ret
+*/
+ULONG_ZE_USHORT_RSHIFT_N_ULONG(51)
+
+/*
+**ulong_ze_uint_rshift_23_ulong:
+**    slli\ta[0-9],a[0-9],9
+**    srli\ta[0-9],a[0-9],32
+**    ret
+*/
+ULONG_ZE_UINT_RSHIFT_N_ULONG(23)
+
+/*
+**uint_ze_ushort_rshift_15_uint:
+**    slli\ta[0-9],a[0-9],33
+**    srli\ta[0-9],a[0-9],48
+**    ret
+*/
+UINT_ZE_USHORT_RSHIFT_N_UINT(15)
+
+/*
+**ulong_ze_ushort_rshift_15_uint:
+**    slli\ta[0-9],a[0-9],33
+**    srli\ta[0-9],a[0-9],48
+**    ret
+*/
+ULONG_ZE_USHORT_RSHIFT_N_UINT(15)
diff --git a/gcc/testsuite/gcc.target/riscv/zero-extend-rshift.c 
b/gcc/testsuite/gcc.target/riscv/zero-extend-rshift.c
new file mode 100644
index 00000000000..706264c8ff1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/zero-extend-rshift.c
@@ -0,0 +1,119 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gc" { target { rv32 } } } */
+/* { dg-options "-march=rv64gc" { target { rv64 } } } */
+/* { dg-skip-if "" { *-*-* } { "-O0" } } */
+
+// Tests for merging rshifts into zero-extensions.
+// u8-casts are not tested as they can be done with one instruction (andi 
0xff).
+
+#define URT_ZE_UCT_RSHIFT_N_UAT(RT,CT,N,AT)                            \
+unsigned RT u##RT##_ze_u##CT##_rshift_##N##_u##AT(unsigned AT v)       \
+{                                                                      \
+    return (unsigned CT)(v >> N);                                      \
+}
+
+#define ULONG_ZE_USHORT_RSHIFT_N_ULONG(N) 
URT_ZE_UCT_RSHIFT_N_UAT(long,short,N,long)
+
+// Below "slli (16-N); srli 16" for rv32
+// Below "slli ((32+16)-N); srli (32+16)" for rv64
+ULONG_ZE_USHORT_RSHIFT_N_ULONG(1)
+ULONG_ZE_USHORT_RSHIFT_N_ULONG(7)
+ULONG_ZE_USHORT_RSHIFT_N_ULONG(8)
+ULONG_ZE_USHORT_RSHIFT_N_ULONG(9)
+ULONG_ZE_USHORT_RSHIFT_N_ULONG(15)
+// Below "srli 16" for rv32
+// Below "srliw 16" for rv64
+ULONG_ZE_USHORT_RSHIFT_N_ULONG(16)
+// Below "srli N" for rv32
+// Below "slli ((32+16)-N); srli (32+16)" for rv64
+ULONG_ZE_USHORT_RSHIFT_N_ULONG(17)
+ULONG_ZE_USHORT_RSHIFT_N_ULONG(23)
+ULONG_ZE_USHORT_RSHIFT_N_ULONG(24)
+ULONG_ZE_USHORT_RSHIFT_N_ULONG(25)
+ULONG_ZE_USHORT_RSHIFT_N_ULONG(31)
+// Below compiler warning for rv32
+#if __riscv_xlen == 64
+// Below "slli ((32+16)-N); srli (32+16)" for rv64
+ULONG_ZE_USHORT_RSHIFT_N_ULONG(32)
+ULONG_ZE_USHORT_RSHIFT_N_ULONG(33)
+ULONG_ZE_USHORT_RSHIFT_N_ULONG(39)
+ULONG_ZE_USHORT_RSHIFT_N_ULONG(40)
+ULONG_ZE_USHORT_RSHIFT_N_ULONG(41)
+ULONG_ZE_USHORT_RSHIFT_N_ULONG(47)
+// Below "srli N" for rv64
+ULONG_ZE_USHORT_RSHIFT_N_ULONG(48)
+ULONG_ZE_USHORT_RSHIFT_N_ULONG(49)
+ULONG_ZE_USHORT_RSHIFT_N_ULONG(55)
+ULONG_ZE_USHORT_RSHIFT_N_ULONG(56)
+ULONG_ZE_USHORT_RSHIFT_N_ULONG(57)
+ULONG_ZE_USHORT_RSHIFT_N_ULONG(63)
+#endif /* __riscv_xlen == 64 */
+
+#define ULONG_ZE_UINT_RSHIFT_N_ULONG(N) 
URT_ZE_UCT_RSHIFT_N_UAT(long,int,N,long)
+
+// Below "srli N" for rv32
+// Below "slli (32-N); srli 32" for rv64
+ULONG_ZE_UINT_RSHIFT_N_ULONG(1)
+ULONG_ZE_UINT_RSHIFT_N_ULONG(7)
+ULONG_ZE_UINT_RSHIFT_N_ULONG(8)
+ULONG_ZE_UINT_RSHIFT_N_ULONG(9)
+ULONG_ZE_UINT_RSHIFT_N_ULONG(15)
+ULONG_ZE_UINT_RSHIFT_N_ULONG(16)
+ULONG_ZE_UINT_RSHIFT_N_ULONG(17)
+ULONG_ZE_UINT_RSHIFT_N_ULONG(23)
+ULONG_ZE_UINT_RSHIFT_N_ULONG(24)
+ULONG_ZE_UINT_RSHIFT_N_ULONG(25)
+ULONG_ZE_UINT_RSHIFT_N_ULONG(31)
+// Below compiler warning for rv32
+#if __riscv_xlen == 64
+// Below "srli N" for rv64
+ULONG_ZE_UINT_RSHIFT_N_ULONG(32)
+ULONG_ZE_UINT_RSHIFT_N_ULONG(33)
+ULONG_ZE_UINT_RSHIFT_N_ULONG(39)
+ULONG_ZE_UINT_RSHIFT_N_ULONG(40)
+ULONG_ZE_UINT_RSHIFT_N_ULONG(41)
+ULONG_ZE_UINT_RSHIFT_N_ULONG(47)
+ULONG_ZE_UINT_RSHIFT_N_ULONG(48)
+ULONG_ZE_UINT_RSHIFT_N_ULONG(49)
+ULONG_ZE_UINT_RSHIFT_N_ULONG(55)
+ULONG_ZE_UINT_RSHIFT_N_ULONG(56)
+ULONG_ZE_UINT_RSHIFT_N_ULONG(57)
+ULONG_ZE_UINT_RSHIFT_N_ULONG(63)
+#endif /* __riscv_xlen == 64 */
+
+#define UINT_ZE_USHORT_RSHIFT_N_UINT(N) 
URT_ZE_UCT_RSHIFT_N_UAT(int,short,N,int)
+
+#if __riscv_xlen == 64
+// Below "slli ((32+16)-N); srli (32+16)" for rv64
+UINT_ZE_USHORT_RSHIFT_N_UINT(1)
+UINT_ZE_USHORT_RSHIFT_N_UINT(7)
+UINT_ZE_USHORT_RSHIFT_N_UINT(8)
+UINT_ZE_USHORT_RSHIFT_N_UINT(9)
+UINT_ZE_USHORT_RSHIFT_N_UINT(15)
+// Below "srliw N" for rv64
+UINT_ZE_USHORT_RSHIFT_N_UINT(16)
+UINT_ZE_USHORT_RSHIFT_N_UINT(17)
+UINT_ZE_USHORT_RSHIFT_N_UINT(23)
+UINT_ZE_USHORT_RSHIFT_N_UINT(24)
+UINT_ZE_USHORT_RSHIFT_N_UINT(25)
+UINT_ZE_USHORT_RSHIFT_N_UINT(31)
+#endif /* __riscv_xlen == 64 */
+
+#define UINT_ZE_USHORT_RSHIFT_N_ULONG(N) 
URT_ZE_UCT_RSHIFT_N_UAT(int,short,N,long)
+// Below "slli (16-N); srli 16" for rv32
+// Below "slli ((32+16)-N); srli (32+16)" for rv64
+UINT_ZE_USHORT_RSHIFT_N_ULONG(9)
+UINT_ZE_USHORT_RSHIFT_N_ULONG(15)
+
+#define ULONG_ZE_USHORT_RSHIFT_N_UINT(N) 
URT_ZE_UCT_RSHIFT_N_UAT(long,short,N,int)
+// Below "slli (16-N); srli 16" for rv32
+// Below "slli ((32+16)-N); srli (32+16)" for rv64
+ULONG_ZE_USHORT_RSHIFT_N_UINT(9)
+ULONG_ZE_USHORT_RSHIFT_N_UINT(15)
+
+/* { dg-final { scan-assembler-times "slli\t" 9 { target { rv32 } } } } */
+/* { dg-final { scan-assembler-times "srli\t" 26 { target { rv32 } } } } */
+
+/* { dg-final { scan-assembler-times "slli\t" 36 { target { rv64 } } } } */
+/* { dg-final { scan-assembler-times "srli\t" 54 { target { rv64 } } } } */
+/* { dg-final { scan-assembler-times "srliw\t" 7 { target { rv64 } } } } */
-- 
2.44.0

Reply via email to