https://gcc.gnu.org/g:fcde4c81644aecbb2b03f0b2f470a4d1116706cc
commit r16-4742-gfcde4c81644aecbb2b03f0b2f470a4d1116706cc Author: Andrew Pinski <[email protected]> Date: Wed Oct 29 11:58:31 2025 -0700 MATCH: Optimize `VEC_SHL_INSERT (dup (A), A)` to just `dup (A) [PR116075] It was noticed if we have `.VEC_SHL_INSERT ({ 0, ... }, 0)` it was not being simplified to just `{ 0, ... }`. This was generated from the autovectorizer (maybe even on accident, see PR tree-optmization/116081). This adds a few SVE testcases to see if this is optimized since the auto-vectorizer or intrinsics are the only two ways of getting this produced. Changes since: * v1: Move the constant case over to fold-const-call.cc. Simplify match pattern to use handle vec_duplicate. Build and tested for aarch64-linux-gnu with no regressions. PR target/116075 gcc/ChangeLog: * fold-const-call.cc (fold_const_vec_shl_insert): New function. (fold_const_call): Call fold_const_vec_shl_insert for CFN_VEC_SHL_INSERT. * match.pd (`VEC_SHL_INSERT (dup (A), A)`): New pattern. gcc/testsuite/ChangeLog: * gcc.target/aarch64/sve/dup-insr-1.c: New test. * gcc.target/aarch64/sve/dup-insr-2.c: New test. Signed-off-by: Andrew Pinski <[email protected]> Diff: --- gcc/fold-const-call.cc | 22 +++++++++++++++++++ gcc/match.pd | 5 +++++ gcc/testsuite/gcc.target/aarch64/sve/dup-insr-1.c | 26 +++++++++++++++++++++++ gcc/testsuite/gcc.target/aarch64/sve/dup-insr-2.c | 26 +++++++++++++++++++++++ 4 files changed, 79 insertions(+) diff --git a/gcc/fold-const-call.cc b/gcc/fold-const-call.cc index 439bf8044f5e..89c1c28b60d6 100644 --- a/gcc/fold-const-call.cc +++ b/gcc/fold-const-call.cc @@ -1440,6 +1440,25 @@ fold_const_fold_left (tree type, tree arg0, tree arg1, tree_code code) return arg0; } +/* Fold a call to IFN_VEC_SHL_INSERT (ARG0, ARG1), returning a value + of type TYPE. */ + +static tree +fold_const_vec_shl_insert (tree, tree arg0, tree arg1) +{ + if (TREE_CODE (arg0) != VECTOR_CST) + return NULL_TREE; + + /* vec_shl_insert ( dup(CST), CST) -> dup (CST). */ + if (tree elem = uniform_vector_p (arg0)) + { + if (operand_equal_p (elem, arg1)) + return arg0; + } + + return NULL_TREE; +} + /* Try to evaluate: *RESULT = FN (*ARG0, *ARG1) @@ -1843,6 +1862,9 @@ fold_const_call (combined_fn fn, tree type, tree arg0, tree arg1) case CFN_FOLD_LEFT_PLUS: return fold_const_fold_left (type, arg0, arg1, PLUS_EXPR); + case CFN_VEC_SHL_INSERT: + return fold_const_vec_shl_insert (type, arg0, arg1); + case CFN_UBSAN_CHECK_ADD: case CFN_ADD_OVERFLOW: subcode = PLUS_EXPR; diff --git a/gcc/match.pd b/gcc/match.pd index b65adece8dc6..6aaf80eee7d5 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -12023,3 +12023,8 @@ and, && direct_internal_fn_supported_p (IFN_AVG_CEIL, type, OPTIMIZE_FOR_BOTH)) (IFN_AVG_CEIL @0 @2))) #endif + +/* vec shift left insert (dup (A), A) -> dup(A) */ +(simplify + (IFN_VEC_SHL_INSERT (vec_duplicate@1 @0) @0) + @1) diff --git a/gcc/testsuite/gcc.target/aarch64/sve/dup-insr-1.c b/gcc/testsuite/gcc.target/aarch64/sve/dup-insr-1.c new file mode 100644 index 000000000000..41dcbba45cf2 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/dup-insr-1.c @@ -0,0 +1,26 @@ +/* { dg-do compile } */ +/* { dg-options "-O -fdump-tree-optimized" } */ +/* PR target/116075 */ + +#include <arm_sve.h> + +svint8_t f(void) +{ + svint8_t tt; + tt = svdup_s8 (0); + tt = svinsr (tt, 0); + return tt; +} + +svint8_t f1(int8_t t) +{ + svint8_t tt; + tt = svdup_s8 (t); + tt = svinsr (tt, t); + return tt; +} + +/* The above 2 functions should have removed the VEC_SHL_INSERT. */ + +/* { dg-final { scan-tree-dump-not ".VEC_SHL_INSERT " "optimized" } } */ + diff --git a/gcc/testsuite/gcc.target/aarch64/sve/dup-insr-2.c b/gcc/testsuite/gcc.target/aarch64/sve/dup-insr-2.c new file mode 100644 index 000000000000..8eafe9746241 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/dup-insr-2.c @@ -0,0 +1,26 @@ +/* { dg-do compile } */ +/* { dg-options "-O -fdump-tree-optimized" } */ +/* PR target/116075 */ + +#include <arm_sve.h> + +svint8_t f(int8_t t) +{ + svint8_t tt; + tt = svdup_s8 (0); + tt = svinsr (tt, t); + return tt; +} + +svint8_t f1(int8_t t) +{ + svint8_t tt; + tt = svdup_s8 (t); + tt = svinsr (tt, 0); + return tt; +} + +/* The above 2 functions should not have removed the VEC_SHL_INSERT. */ + +/* { dg-final { scan-tree-dump-times ".VEC_SHL_INSERT " 2 "optimized" } } */ +
