https://gcc.gnu.org/g:d3b05ff815921ffe525787ed375665ef08a35135
commit r16-7006-gd3b05ff815921ffe525787ed375665ef08a35135 Author: Robin Dapp <[email protected]> Date: Fri Jan 23 11:51:57 2026 +0100 vect: Only scale vec_offset once [PR123767]. Since allowing "unsupported" scales by just multiplying there was an issue with how the vec_offset was adjusted: For "real" gathers/scatters we have a separate vec_offset per stmt copy. For strided gather/scatter, however, there is just one vec_offset common to all copies. In case of an unsupported scale we need to multiply vec_offset with the required scale which is currently done like this: for (i = 0; i < num_vec; i++) vec_offset = vec_offset * scale_constant; where vec_offset is only different for real gathers/scatter. Thus, for more than one copy of a strided gather/scatter, we will erroneously multiply an already scaled vec_offset. This patch only performs the vec_offset scaling - for each copy in real gathers/scatters or - once for the first copy for strided gathers/scatters. PR tree-optimization/123767 gcc/ChangeLog: * tree-vect-stmts.cc (vectorizable_store): Only scale offset once. (vectorizable_load): Ditto. gcc/testsuite/ChangeLog: * gcc.target/aarch64/sve/pr123767.c: New test. Diff: --- gcc/testsuite/gcc.target/aarch64/sve/pr123767.c | 35 +++++++++++++++++++++ gcc/tree-vect-stmts.cc | 42 +++++++++++++++---------- 2 files changed, 61 insertions(+), 16 deletions(-) diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr123767.c b/gcc/testsuite/gcc.target/aarch64/sve/pr123767.c new file mode 100644 index 000000000000..5b123c057680 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/pr123767.c @@ -0,0 +1,35 @@ +/* { dg-do compile } */ +/* { dg-options "-march=armv9-a -O3 -mmax-vectorization -msve-vector-bits=128 -mautovec-preference=sve-only -fdump-tree-vect-details" } */ + +struct partition_elem +{ + int class_element; + struct partition_elem* next; + unsigned class_count; +}; + +typedef struct partition_def +{ + int num_elements; + struct partition_elem elements[1]; +} *partition; + +partition part; + +partition +partition_new (int num_elements) +{ + int e; + + /* No need to allocate memory, just a compile test. */ + for (e = 0; e < num_elements; ++e) + { + part->elements[e].class_element = e; + part->elements[e].next = &(part->elements[e]); + part->elements[e].class_count = 1; + } + + return part; +} + +/* { dg-final { scan-tree-dump-not "\\{ 0, 576 \\}" "vect" } } */ diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index 234a3aa4f4ba..ee98e72d1e57 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -8972,14 +8972,19 @@ vectorizable_store (vec_info *vinfo, (&stmts, ls.supported_offset_vectype, vec_offset); if (ls.supported_scale) { - tree mult_cst = build_int_cst - (TREE_TYPE (TREE_TYPE (vec_offset)), - SLP_TREE_GS_SCALE (slp_node) / ls.supported_scale); - tree mult = build_vector_from_val - (TREE_TYPE (vec_offset), mult_cst); - vec_offset = gimple_build - (&stmts, MULT_EXPR, TREE_TYPE (vec_offset), - vec_offset, mult); + /* Only scale the vec_offset if we haven't already. */ + if (STMT_VINFO_GATHER_SCATTER_P (stmt_info) + || j == 0) + { + tree mult_cst = build_int_cst + (TREE_TYPE (TREE_TYPE (vec_offset)), + SLP_TREE_GS_SCALE (slp_node) / ls.supported_scale); + tree mult = build_vector_from_val + (TREE_TYPE (vec_offset), mult_cst); + vec_offset = gimple_build + (&stmts, MULT_EXPR, TREE_TYPE (vec_offset), + vec_offset, mult); + } scale = size_int (ls.supported_scale); } gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); @@ -10923,14 +10928,19 @@ vectorizable_load (vec_info *vinfo, (&stmts, ls.supported_offset_vectype, vec_offset); if (ls.supported_scale) { - tree mult_cst = build_int_cst - (TREE_TYPE (TREE_TYPE (vec_offset)), - SLP_TREE_GS_SCALE (slp_node) / ls.supported_scale); - tree mult = build_vector_from_val - (TREE_TYPE (vec_offset), mult_cst); - vec_offset = gimple_build - (&stmts, MULT_EXPR, TREE_TYPE (vec_offset), - vec_offset, mult); + /* Only scale the vec_offset if we haven't already. */ + if (STMT_VINFO_GATHER_SCATTER_P (stmt_info) + || i == 0) + { + tree mult_cst = build_int_cst + (TREE_TYPE (TREE_TYPE (vec_offset)), + SLP_TREE_GS_SCALE (slp_node) / ls.supported_scale); + tree mult = build_vector_from_val + (TREE_TYPE (vec_offset), mult_cst); + vec_offset = gimple_build + (&stmts, MULT_EXPR, TREE_TYPE (vec_offset), + vec_offset, mult); + } scale = size_int (ls.supported_scale); } gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
