The combiner attempts to optimize a zero-extension of a logical right shift using zero_extract. We already utilize this optimization for those cases that result in a single instructions. Let's add a insn_and_split pattern that also matches the generic case, where we can emit an optimized sequence of a slli/srli.
Tested with SPEC CPU 2017 (rv64gc). PR 111501 gcc/ChangeLog: * config/riscv/riscv.md (*lshr<GPR:mode>3_zero_extend_4): New pattern for zero-extraction. gcc/testsuite/ChangeLog: * gcc.target/riscv/pr111501.c: New test. * gcc.target/riscv/zero-extend-rshift-32.c: New test. * gcc.target/riscv/zero-extend-rshift-64.c: New test. * gcc.target/riscv/zero-extend-rshift.c: New test. Signed-off-by: Christoph Müllner <christoph.muell...@vrull.eu> --- gcc/config/riscv/riscv.md | 30 +++++ gcc/testsuite/gcc.target/riscv/pr111501.c | 32 +++++ .../gcc.target/riscv/zero-extend-rshift-32.c | 37 ++++++ .../gcc.target/riscv/zero-extend-rshift-64.c | 63 ++++++++++ .../gcc.target/riscv/zero-extend-rshift.c | 119 ++++++++++++++++++ 5 files changed, 281 insertions(+) create mode 100644 gcc/testsuite/gcc.target/riscv/pr111501.c create mode 100644 gcc/testsuite/gcc.target/riscv/zero-extend-rshift-32.c create mode 100644 gcc/testsuite/gcc.target/riscv/zero-extend-rshift-64.c create mode 100644 gcc/testsuite/gcc.target/riscv/zero-extend-rshift.c diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md index d4676507b45..80cbecb78e8 100644 --- a/gcc/config/riscv/riscv.md +++ b/gcc/config/riscv/riscv.md @@ -2792,6 +2792,36 @@ (define_insn "*lshrsi3_zero_extend_3" [(set_attr "type" "shift") (set_attr "mode" "SI")]) +;; Canonical form for a zero-extend of a logical right shift. +;; Special cases are handled above. +;; Skip for single-bit extraction (Zbs/XTheadBs) and th.extu (XTheadBb) +(define_insn_and_split "*lshr<GPR:mode>3_zero_extend_4" + [(set (match_operand:GPR 0 "register_operand" "=r") + (zero_extract:GPR + (match_operand:GPR 1 "register_operand" " r") + (match_operand 2 "const_int_operand") + (match_operand 3 "const_int_operand"))) + (clobber (match_scratch:GPR 4 "=&r"))] + "!((TARGET_ZBS || TARGET_XTHEADBS) && (INTVAL (operands[2]) == 1)) + && !TARGET_XTHEADBB" + "#" + "&& reload_completed" + [(set (match_dup 4) + (ashift:GPR (match_dup 1) (match_dup 2))) + (set (match_dup 0) + (lshiftrt:GPR (match_dup 4) (match_dup 3)))] +{ + int regbits = GET_MODE_BITSIZE (GET_MODE (operands[0])).to_constant (); + int sizebits = INTVAL (operands[2]); + int startbits = INTVAL (operands[3]); + int lshamt = regbits - sizebits - startbits; + int rshamt = lshamt + startbits; + operands[2] = GEN_INT (lshamt); + operands[3] = GEN_INT (rshamt); +} + [(set_attr "type" "shift") + (set_attr "mode" "<GPR:MODE>")]) + ;; Handle AND with 2^N-1 for N from 12 to XLEN. This can be split into ;; two logical shifts. Otherwise it requires 3 instructions: lui, ;; xor/addi/srli, and. diff --git a/gcc/testsuite/gcc.target/riscv/pr111501.c b/gcc/testsuite/gcc.target/riscv/pr111501.c new file mode 100644 index 00000000000..9355be242e7 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/pr111501.c @@ -0,0 +1,32 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target rv64 } */ +/* { dg-options "-march=rv64gc" { target { rv64 } } } */ +/* { dg-skip-if "" { *-*-* } {"-O0" "-Os" "-Og" "-Oz" "-flto" } } */ +/* { dg-final { check-function-bodies "**" "" } } */ +/* { dg-allow-blank-lines-in-output 1 } */ + +/* +**do_shift: +** ... +** slli\ta[0-9],a[0-9],16 +** srli\ta[0-9],a[0-9],48 +** ... +*/ +unsigned int +do_shift(unsigned long csum) +{ + return (unsigned short)(csum >> 32); +} + +/* +**do_shift2: +** ... +** slli\ta[0-9],a[0-9],16 +** srli\ta[0-9],a[0-9],48 +** ... +*/ +unsigned int +do_shift2(unsigned long csum) +{ + return (csum << 16) >> 48; +} diff --git a/gcc/testsuite/gcc.target/riscv/zero-extend-rshift-32.c b/gcc/testsuite/gcc.target/riscv/zero-extend-rshift-32.c new file mode 100644 index 00000000000..2824d6fe074 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/zero-extend-rshift-32.c @@ -0,0 +1,37 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target rv32 } */ +/* { dg-options "-march=rv32gc" } */ +/* { dg-skip-if "" { *-*-* } {"-O0" "-Os" "-Og" "-Oz" "-flto" } } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#define URT_ZE_UCT_RSHIFT_N_UAT(RT,CT,N,AT) \ +unsigned RT u##RT##_ze_u##CT##_rshift_##N##_u##AT(unsigned AT v) \ +{ \ + return (unsigned CT)(v >> N); \ +} + +#define ULONG_ZE_USHORT_RSHIFT_N_ULONG(N) URT_ZE_UCT_RSHIFT_N_UAT(long,short,N,long) +#define ULONG_ZE_UINT_RSHIFT_N_ULONG(N) URT_ZE_UCT_RSHIFT_N_UAT(long,int,N,long) + +/* +**ulong_ze_ushort_rshift_9_ulong: +** slli\ta[0-9],a[0-9],7 +** srli\ta[0-9],a[0-9],16 +** ret +*/ +ULONG_ZE_USHORT_RSHIFT_N_ULONG(9) + +/* +**ulong_ze_ushort_rshift_14_ulong: +** slli\ta[0-9],a[0-9],2 +** srli\ta[0-9],a[0-9],16 +** ret +*/ +ULONG_ZE_USHORT_RSHIFT_N_ULONG(14) + +/* +**ulong_ze_uint_rshift_23_ulong: +** srli\ta[0-9],a[0-9],23 +** ret +*/ +ULONG_ZE_UINT_RSHIFT_N_ULONG(23) diff --git a/gcc/testsuite/gcc.target/riscv/zero-extend-rshift-64.c b/gcc/testsuite/gcc.target/riscv/zero-extend-rshift-64.c new file mode 100644 index 00000000000..ec5c2745561 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/zero-extend-rshift-64.c @@ -0,0 +1,63 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target rv64 } */ +/* { dg-options "-march=rv64gc" } */ +/* { dg-skip-if "" { *-*-* } {"-O0" "-Os" "-Og" "-Oz" "-flto" } } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#define URT_ZE_UCT_RSHIFT_N_UAT(RT,CT,N,AT) \ +unsigned RT u##RT##_ze_u##CT##_rshift_##N##_u##AT(unsigned AT v) \ +{ \ + return (unsigned CT)(v >> N); \ +} + +#define ULONG_ZE_USHORT_RSHIFT_N_ULONG(N) URT_ZE_UCT_RSHIFT_N_UAT(long,short,N,long) +#define ULONG_ZE_UINT_RSHIFT_N_ULONG(N) URT_ZE_UCT_RSHIFT_N_UAT(long,int,N,long) +#define UINT_ZE_USHORT_RSHIFT_N_UINT(N) URT_ZE_UCT_RSHIFT_N_UAT(int,short,N,int) +#define ULONG_ZE_USHORT_RSHIFT_N_UINT(N) URT_ZE_UCT_RSHIFT_N_UAT(long,short,N,int) + +/* +**ulong_ze_ushort_rshift_9_ulong: +** slli\ta[0-9],a[0-9],39 +** srli\ta[0-9],a[0-9],48 +** ret +*/ +ULONG_ZE_USHORT_RSHIFT_N_ULONG(9) + +/* +**ulong_ze_ushort_rshift_14_ulong: +** slli\ta[0-9],a[0-9],34 +** srli\ta[0-9],a[0-9],48 +** ret +*/ +ULONG_ZE_USHORT_RSHIFT_N_ULONG(14) + +/* +**ulong_ze_ushort_rshift_51_ulong: +** srli\ta[0-9],a[0-9],51 +** ret +*/ +ULONG_ZE_USHORT_RSHIFT_N_ULONG(51) + +/* +**ulong_ze_uint_rshift_23_ulong: +** slli\ta[0-9],a[0-9],9 +** srli\ta[0-9],a[0-9],32 +** ret +*/ +ULONG_ZE_UINT_RSHIFT_N_ULONG(23) + +/* +**uint_ze_ushort_rshift_15_uint: +** slli\ta[0-9],a[0-9],33 +** srli\ta[0-9],a[0-9],48 +** ret +*/ +UINT_ZE_USHORT_RSHIFT_N_UINT(15) + +/* +**ulong_ze_ushort_rshift_15_uint: +** slli\ta[0-9],a[0-9],33 +** srli\ta[0-9],a[0-9],48 +** ret +*/ +ULONG_ZE_USHORT_RSHIFT_N_UINT(15) diff --git a/gcc/testsuite/gcc.target/riscv/zero-extend-rshift.c b/gcc/testsuite/gcc.target/riscv/zero-extend-rshift.c new file mode 100644 index 00000000000..706264c8ff1 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/zero-extend-rshift.c @@ -0,0 +1,119 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv32gc" { target { rv32 } } } */ +/* { dg-options "-march=rv64gc" { target { rv64 } } } */ +/* { dg-skip-if "" { *-*-* } { "-O0" } } */ + +// Tests for merging rshifts into zero-extensions. +// u8-casts are not tested as they can be done with one instruction (andi 0xff). + +#define URT_ZE_UCT_RSHIFT_N_UAT(RT,CT,N,AT) \ +unsigned RT u##RT##_ze_u##CT##_rshift_##N##_u##AT(unsigned AT v) \ +{ \ + return (unsigned CT)(v >> N); \ +} + +#define ULONG_ZE_USHORT_RSHIFT_N_ULONG(N) URT_ZE_UCT_RSHIFT_N_UAT(long,short,N,long) + +// Below "slli (16-N); srli 16" for rv32 +// Below "slli ((32+16)-N); srli (32+16)" for rv64 +ULONG_ZE_USHORT_RSHIFT_N_ULONG(1) +ULONG_ZE_USHORT_RSHIFT_N_ULONG(7) +ULONG_ZE_USHORT_RSHIFT_N_ULONG(8) +ULONG_ZE_USHORT_RSHIFT_N_ULONG(9) +ULONG_ZE_USHORT_RSHIFT_N_ULONG(15) +// Below "srli 16" for rv32 +// Below "srliw 16" for rv64 +ULONG_ZE_USHORT_RSHIFT_N_ULONG(16) +// Below "srli N" for rv32 +// Below "slli ((32+16)-N); srli (32+16)" for rv64 +ULONG_ZE_USHORT_RSHIFT_N_ULONG(17) +ULONG_ZE_USHORT_RSHIFT_N_ULONG(23) +ULONG_ZE_USHORT_RSHIFT_N_ULONG(24) +ULONG_ZE_USHORT_RSHIFT_N_ULONG(25) +ULONG_ZE_USHORT_RSHIFT_N_ULONG(31) +// Below compiler warning for rv32 +#if __riscv_xlen == 64 +// Below "slli ((32+16)-N); srli (32+16)" for rv64 +ULONG_ZE_USHORT_RSHIFT_N_ULONG(32) +ULONG_ZE_USHORT_RSHIFT_N_ULONG(33) +ULONG_ZE_USHORT_RSHIFT_N_ULONG(39) +ULONG_ZE_USHORT_RSHIFT_N_ULONG(40) +ULONG_ZE_USHORT_RSHIFT_N_ULONG(41) +ULONG_ZE_USHORT_RSHIFT_N_ULONG(47) +// Below "srli N" for rv64 +ULONG_ZE_USHORT_RSHIFT_N_ULONG(48) +ULONG_ZE_USHORT_RSHIFT_N_ULONG(49) +ULONG_ZE_USHORT_RSHIFT_N_ULONG(55) +ULONG_ZE_USHORT_RSHIFT_N_ULONG(56) +ULONG_ZE_USHORT_RSHIFT_N_ULONG(57) +ULONG_ZE_USHORT_RSHIFT_N_ULONG(63) +#endif /* __riscv_xlen == 64 */ + +#define ULONG_ZE_UINT_RSHIFT_N_ULONG(N) URT_ZE_UCT_RSHIFT_N_UAT(long,int,N,long) + +// Below "srli N" for rv32 +// Below "slli (32-N); srli 32" for rv64 +ULONG_ZE_UINT_RSHIFT_N_ULONG(1) +ULONG_ZE_UINT_RSHIFT_N_ULONG(7) +ULONG_ZE_UINT_RSHIFT_N_ULONG(8) +ULONG_ZE_UINT_RSHIFT_N_ULONG(9) +ULONG_ZE_UINT_RSHIFT_N_ULONG(15) +ULONG_ZE_UINT_RSHIFT_N_ULONG(16) +ULONG_ZE_UINT_RSHIFT_N_ULONG(17) +ULONG_ZE_UINT_RSHIFT_N_ULONG(23) +ULONG_ZE_UINT_RSHIFT_N_ULONG(24) +ULONG_ZE_UINT_RSHIFT_N_ULONG(25) +ULONG_ZE_UINT_RSHIFT_N_ULONG(31) +// Below compiler warning for rv32 +#if __riscv_xlen == 64 +// Below "srli N" for rv64 +ULONG_ZE_UINT_RSHIFT_N_ULONG(32) +ULONG_ZE_UINT_RSHIFT_N_ULONG(33) +ULONG_ZE_UINT_RSHIFT_N_ULONG(39) +ULONG_ZE_UINT_RSHIFT_N_ULONG(40) +ULONG_ZE_UINT_RSHIFT_N_ULONG(41) +ULONG_ZE_UINT_RSHIFT_N_ULONG(47) +ULONG_ZE_UINT_RSHIFT_N_ULONG(48) +ULONG_ZE_UINT_RSHIFT_N_ULONG(49) +ULONG_ZE_UINT_RSHIFT_N_ULONG(55) +ULONG_ZE_UINT_RSHIFT_N_ULONG(56) +ULONG_ZE_UINT_RSHIFT_N_ULONG(57) +ULONG_ZE_UINT_RSHIFT_N_ULONG(63) +#endif /* __riscv_xlen == 64 */ + +#define UINT_ZE_USHORT_RSHIFT_N_UINT(N) URT_ZE_UCT_RSHIFT_N_UAT(int,short,N,int) + +#if __riscv_xlen == 64 +// Below "slli ((32+16)-N); srli (32+16)" for rv64 +UINT_ZE_USHORT_RSHIFT_N_UINT(1) +UINT_ZE_USHORT_RSHIFT_N_UINT(7) +UINT_ZE_USHORT_RSHIFT_N_UINT(8) +UINT_ZE_USHORT_RSHIFT_N_UINT(9) +UINT_ZE_USHORT_RSHIFT_N_UINT(15) +// Below "srliw N" for rv64 +UINT_ZE_USHORT_RSHIFT_N_UINT(16) +UINT_ZE_USHORT_RSHIFT_N_UINT(17) +UINT_ZE_USHORT_RSHIFT_N_UINT(23) +UINT_ZE_USHORT_RSHIFT_N_UINT(24) +UINT_ZE_USHORT_RSHIFT_N_UINT(25) +UINT_ZE_USHORT_RSHIFT_N_UINT(31) +#endif /* __riscv_xlen == 64 */ + +#define UINT_ZE_USHORT_RSHIFT_N_ULONG(N) URT_ZE_UCT_RSHIFT_N_UAT(int,short,N,long) +// Below "slli (16-N); srli 16" for rv32 +// Below "slli ((32+16)-N); srli (32+16)" for rv64 +UINT_ZE_USHORT_RSHIFT_N_ULONG(9) +UINT_ZE_USHORT_RSHIFT_N_ULONG(15) + +#define ULONG_ZE_USHORT_RSHIFT_N_UINT(N) URT_ZE_UCT_RSHIFT_N_UAT(long,short,N,int) +// Below "slli (16-N); srli 16" for rv32 +// Below "slli ((32+16)-N); srli (32+16)" for rv64 +ULONG_ZE_USHORT_RSHIFT_N_UINT(9) +ULONG_ZE_USHORT_RSHIFT_N_UINT(15) + +/* { dg-final { scan-assembler-times "slli\t" 9 { target { rv32 } } } } */ +/* { dg-final { scan-assembler-times "srli\t" 26 { target { rv32 } } } } */ + +/* { dg-final { scan-assembler-times "slli\t" 36 { target { rv64 } } } } */ +/* { dg-final { scan-assembler-times "srli\t" 54 { target { rv64 } } } } */ +/* { dg-final { scan-assembler-times "srliw\t" 7 { target { rv64 } } } } */ -- 2.44.0