Re: [PATCH] RISC-V: Fix vec_init for simple sequences [PR114028].
> +/* { dg-final { scan-assembler-times "vmv\.v\.i\tv\[0-9\],0" 0 } } */ > > I think you should use "scan-assembler-not" Thanks, going to commit with that change. Regards Robin
Re: [PATCH] RISC-V: Fix vec_init for simple sequences [PR114028].
Sorry, I missed review the testcase: +/* { dg-final { scan-assembler-times "vmv\.v\.i\tv\[0-9\],0" 0 } } */ I think you should use "scan-assembler-not" juzhe.zh...@rivai.ai From: Robin Dapp Date: 2024-02-23 04:02 To: gcc-patches; palmer; Kito Cheng; juzhe.zh...@rivai.ai CC: rdapp.gcc; jeffreyalaw Subject: [PATCH] RISC-V: Fix vec_init for simple sequences [PR114028]. Hi, for a vec_init (_a, _a, _a, _a) with _a of mode DImode we try to construct a "superword" of two "_a"s. This only works for modes < Pmode when we can "shift and or" two halves into one Pmode register. This patch disallows the optimization for inner_mode == Pmode and emits a simple broadcast in such a case. The test is not a run test because it requires vlen=256 in qemu. I can adjust that still of course. Regtested on rv64, rv32 still running. Regards Robin gcc/ChangeLog: PR target/114028 * config/riscv/riscv-v.cc (rvv_builder::can_duplicate_repeating_sequence_p): Return false if inner mode is already Pmode. (rvv_builder::is_all_same_sequence): New function. (expand_vec_init): Emit broadcast if sequence is all same. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/pr114028.c: New test. --- gcc/config/riscv/riscv-v.cc | 25 ++- .../gcc.target/riscv/rvv/autovec/pr114028.c | 25 +++ 2 files changed, 49 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr114028.c diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index 0cfbd21ce6f..29d58deb995 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -443,6 +443,7 @@ public: } bool can_duplicate_repeating_sequence_p (); + bool is_repeating_sequence (); rtx get_merged_repeating_sequence (); bool repeating_sequence_use_merge_profitable_p (); @@ -483,7 +484,8 @@ rvv_builder::can_duplicate_repeating_sequence_p () { poly_uint64 new_size = exact_div (full_nelts (), npatterns ()); unsigned int new_inner_size = m_inner_bits_size * npatterns (); - if (!int_mode_for_size (new_inner_size, 0).exists (&m_new_inner_mode) + if (m_inner_mode == Pmode + || !int_mode_for_size (new_inner_size, 0).exists (&m_new_inner_mode) || GET_MODE_SIZE (m_new_inner_mode) > UNITS_PER_WORD || !get_vector_mode (m_new_inner_mode, new_size).exists (&m_new_mode)) return false; @@ -492,6 +494,18 @@ rvv_builder::can_duplicate_repeating_sequence_p () return nelts_per_pattern () == 1; } +/* Return true if the vector is a simple sequence with one pattern and all + elements the same. */ +bool +rvv_builder::is_repeating_sequence () +{ + if (npatterns () > 1) +return false; + if (full_nelts ().is_constant ()) +return repeating_sequence_p (0, full_nelts ().to_constant (), 1); + return nelts_per_pattern () == 1; +} + /* Return true if it is a repeating sequence that using merge approach has better codegen than using default approach (slide1down). @@ -2544,6 +2558,15 @@ expand_vec_init (rtx target, rtx vals) v.quick_push (XVECEXP (vals, 0, i)); v.finalize (); + /* If the sequence is v = { a, a, a, a } just broadcast an element. */ + if (v.is_repeating_sequence ()) +{ + machine_mode mode = GET_MODE (target); + rtx dup = expand_vector_broadcast (mode, v.elt (0)); + emit_move_insn (target, dup); + return; +} + if (nelts > 3) { /* Case 1: Convert v = { a, b, a, b } into v = { ab, ab }. */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr114028.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr114028.c new file mode 100644 index 000..a451d85e3fe --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr114028.c @@ -0,0 +1,25 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv_zvl256b -O3" } */ + +int a, d = 55003; +long c = 0, h; +long e = 1; +short i; + +int +main () +{ + for (int g = 0; g < 16; g++) +{ + d |= c; + short l = d; + i = l < 0 || a >> 4 ? d : a; + h = i - 8L; + e &= h; +} + + if (e != 1) +__builtin_abort (); +} + +/* { dg-final { scan-assembler-times "vmv\.v\.i\tv\[0-9\],0" 0 } } */ -- 2.43.2
Re: [PATCH] RISC-V: Fix vec_init for simple sequences [PR114028].
lgtm. juzhe.zh...@rivai.ai From: Robin Dapp Date: 2024-02-23 04:02 To: gcc-patches; palmer; Kito Cheng; juzhe.zh...@rivai.ai CC: rdapp.gcc; jeffreyalaw Subject: [PATCH] RISC-V: Fix vec_init for simple sequences [PR114028]. Hi, for a vec_init (_a, _a, _a, _a) with _a of mode DImode we try to construct a "superword" of two "_a"s. This only works for modes < Pmode when we can "shift and or" two halves into one Pmode register. This patch disallows the optimization for inner_mode == Pmode and emits a simple broadcast in such a case. The test is not a run test because it requires vlen=256 in qemu. I can adjust that still of course. Regtested on rv64, rv32 still running. Regards Robin gcc/ChangeLog: PR target/114028 * config/riscv/riscv-v.cc (rvv_builder::can_duplicate_repeating_sequence_p): Return false if inner mode is already Pmode. (rvv_builder::is_all_same_sequence): New function. (expand_vec_init): Emit broadcast if sequence is all same. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/pr114028.c: New test. --- gcc/config/riscv/riscv-v.cc | 25 ++- .../gcc.target/riscv/rvv/autovec/pr114028.c | 25 +++ 2 files changed, 49 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr114028.c diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index 0cfbd21ce6f..29d58deb995 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -443,6 +443,7 @@ public: } bool can_duplicate_repeating_sequence_p (); + bool is_repeating_sequence (); rtx get_merged_repeating_sequence (); bool repeating_sequence_use_merge_profitable_p (); @@ -483,7 +484,8 @@ rvv_builder::can_duplicate_repeating_sequence_p () { poly_uint64 new_size = exact_div (full_nelts (), npatterns ()); unsigned int new_inner_size = m_inner_bits_size * npatterns (); - if (!int_mode_for_size (new_inner_size, 0).exists (&m_new_inner_mode) + if (m_inner_mode == Pmode + || !int_mode_for_size (new_inner_size, 0).exists (&m_new_inner_mode) || GET_MODE_SIZE (m_new_inner_mode) > UNITS_PER_WORD || !get_vector_mode (m_new_inner_mode, new_size).exists (&m_new_mode)) return false; @@ -492,6 +494,18 @@ rvv_builder::can_duplicate_repeating_sequence_p () return nelts_per_pattern () == 1; } +/* Return true if the vector is a simple sequence with one pattern and all + elements the same. */ +bool +rvv_builder::is_repeating_sequence () +{ + if (npatterns () > 1) +return false; + if (full_nelts ().is_constant ()) +return repeating_sequence_p (0, full_nelts ().to_constant (), 1); + return nelts_per_pattern () == 1; +} + /* Return true if it is a repeating sequence that using merge approach has better codegen than using default approach (slide1down). @@ -2544,6 +2558,15 @@ expand_vec_init (rtx target, rtx vals) v.quick_push (XVECEXP (vals, 0, i)); v.finalize (); + /* If the sequence is v = { a, a, a, a } just broadcast an element. */ + if (v.is_repeating_sequence ()) +{ + machine_mode mode = GET_MODE (target); + rtx dup = expand_vector_broadcast (mode, v.elt (0)); + emit_move_insn (target, dup); + return; +} + if (nelts > 3) { /* Case 1: Convert v = { a, b, a, b } into v = { ab, ab }. */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr114028.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr114028.c new file mode 100644 index 000..a451d85e3fe --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr114028.c @@ -0,0 +1,25 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv_zvl256b -O3" } */ + +int a, d = 55003; +long c = 0, h; +long e = 1; +short i; + +int +main () +{ + for (int g = 0; g < 16; g++) +{ + d |= c; + short l = d; + i = l < 0 || a >> 4 ? d : a; + h = i - 8L; + e &= h; +} + + if (e != 1) +__builtin_abort (); +} + +/* { dg-final { scan-assembler-times "vmv\.v\.i\tv\[0-9\],0" 0 } } */ -- 2.43.2
[PATCH] RISC-V: Fix vec_init for simple sequences [PR114028].
Hi, for a vec_init (_a, _a, _a, _a) with _a of mode DImode we try to construct a "superword" of two "_a"s. This only works for modes < Pmode when we can "shift and or" two halves into one Pmode register. This patch disallows the optimization for inner_mode == Pmode and emits a simple broadcast in such a case. The test is not a run test because it requires vlen=256 in qemu. I can adjust that still of course. Regtested on rv64, rv32 still running. Regards Robin gcc/ChangeLog: PR target/114028 * config/riscv/riscv-v.cc (rvv_builder::can_duplicate_repeating_sequence_p): Return false if inner mode is already Pmode. (rvv_builder::is_all_same_sequence): New function. (expand_vec_init): Emit broadcast if sequence is all same. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/pr114028.c: New test. --- gcc/config/riscv/riscv-v.cc | 25 ++- .../gcc.target/riscv/rvv/autovec/pr114028.c | 25 +++ 2 files changed, 49 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr114028.c diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index 0cfbd21ce6f..29d58deb995 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -443,6 +443,7 @@ public: } bool can_duplicate_repeating_sequence_p (); + bool is_repeating_sequence (); rtx get_merged_repeating_sequence (); bool repeating_sequence_use_merge_profitable_p (); @@ -483,7 +484,8 @@ rvv_builder::can_duplicate_repeating_sequence_p () { poly_uint64 new_size = exact_div (full_nelts (), npatterns ()); unsigned int new_inner_size = m_inner_bits_size * npatterns (); - if (!int_mode_for_size (new_inner_size, 0).exists (&m_new_inner_mode) + if (m_inner_mode == Pmode + || !int_mode_for_size (new_inner_size, 0).exists (&m_new_inner_mode) || GET_MODE_SIZE (m_new_inner_mode) > UNITS_PER_WORD || !get_vector_mode (m_new_inner_mode, new_size).exists (&m_new_mode)) return false; @@ -492,6 +494,18 @@ rvv_builder::can_duplicate_repeating_sequence_p () return nelts_per_pattern () == 1; } +/* Return true if the vector is a simple sequence with one pattern and all + elements the same. */ +bool +rvv_builder::is_repeating_sequence () +{ + if (npatterns () > 1) +return false; + if (full_nelts ().is_constant ()) +return repeating_sequence_p (0, full_nelts ().to_constant (), 1); + return nelts_per_pattern () == 1; +} + /* Return true if it is a repeating sequence that using merge approach has better codegen than using default approach (slide1down). @@ -2544,6 +2558,15 @@ expand_vec_init (rtx target, rtx vals) v.quick_push (XVECEXP (vals, 0, i)); v.finalize (); + /* If the sequence is v = { a, a, a, a } just broadcast an element. */ + if (v.is_repeating_sequence ()) +{ + machine_mode mode = GET_MODE (target); + rtx dup = expand_vector_broadcast (mode, v.elt (0)); + emit_move_insn (target, dup); + return; +} + if (nelts > 3) { /* Case 1: Convert v = { a, b, a, b } into v = { ab, ab }. */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr114028.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr114028.c new file mode 100644 index 000..a451d85e3fe --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr114028.c @@ -0,0 +1,25 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv_zvl256b -O3" } */ + +int a, d = 55003; +long c = 0, h; +long e = 1; +short i; + +int +main () +{ + for (int g = 0; g < 16; g++) +{ + d |= c; + short l = d; + i = l < 0 || a >> 4 ? d : a; + h = i - 8L; + e &= h; +} + + if (e != 1) +__builtin_abort (); +} + +/* { dg-final { scan-assembler-times "vmv\.v\.i\tv\[0-9\],0" 0 } } */ -- 2.43.2