On Wed, 18 Oct 2023 at 23:22, Richard Sandiford <richard.sandif...@arm.com> wrote: > > Prathamesh Kulkarni <prathamesh.kulka...@linaro.org> writes: > > On Tue, 17 Oct 2023 at 02:40, Richard Sandiford > > <richard.sandif...@arm.com> wrote: > >> Prathamesh Kulkarni <prathamesh.kulka...@linaro.org> writes: > >> > diff --git a/gcc/fold-const.cc b/gcc/fold-const.cc > >> > index 4f8561509ff..55a6a68c16c 100644 > >> > --- a/gcc/fold-const.cc > >> > +++ b/gcc/fold-const.cc > >> > @@ -10684,9 +10684,8 @@ valid_mask_for_fold_vec_perm_cst_p (tree arg0, > >> > tree arg1, > >> > > >> > /* Ensure that the stepped sequence always selects from the same > >> > input pattern. */ > >> > - unsigned arg_npatterns > >> > - = ((q1 & 1) == 0) ? VECTOR_CST_NPATTERNS (arg0) > >> > - : VECTOR_CST_NPATTERNS (arg1); > >> > + tree arg = ((q1 & 1) == 0) ? arg0 : arg1; > >> > + unsigned arg_npatterns = VECTOR_CST_NPATTERNS (arg); > >> > > >> > if (!multiple_p (step, arg_npatterns)) > >> > { > >> > @@ -10694,6 +10693,29 @@ valid_mask_for_fold_vec_perm_cst_p (tree arg0, > >> > tree arg1, > >> > *reason = "step is not multiple of npatterns"; > >> > return false; > >> > } > >> > + > >> > + /* If a1 chooses base element from arg, ensure that it's a natural > >> > + stepped sequence, ie, (arg[2] - arg[1]) == (arg[1] - arg[0]) > >> > + to preserve arg's encoding. */ > >> > + > >> > + unsigned HOST_WIDE_INT index; > >> > + if (!r1.is_constant (&index)) > >> > + return false; > >> > + if (index < arg_npatterns) > >> > + { > >> > >> I don't know whether it matters in practice, but I think the two conditions > >> above are more natural as: > >> > >> if (maybe_lt (r1, arg_npatterns)) > >> { > >> unsigned HOST_WIDE_INT index; > >> if (!r1.is_constant (&index)) > >> return false; > >> > >> ...[code below]... > >> } > >> > >> > + tree arg_elem0 = vector_cst_elt (arg, index); > >> > + tree arg_elem1 = vector_cst_elt (arg, index + arg_npatterns); > >> > + tree arg_elem2 = vector_cst_elt (arg, index + arg_npatterns * 2); > >> > + > >> > + if (!operand_equal_p (const_binop (MINUS_EXPR, arg_elem2, > >> > arg_elem1), > >> > + const_binop (MINUS_EXPR, arg_elem1, > >> > arg_elem0), > >> > + 0)) > >> > >> This needs to check whether const_binop returns null. Maybe: > >> > >> tree step1, step2; > >> if (!(step1 = const_binop (MINUS_EXPR, arg_elem1, arg_elem0)) > >> || !(step2 = const_binop (MINUS_EXPR, arg_elem2, arg_elem1)) > >> || !operand_equal_p (step1, step2, 0)) > >> > >> OK with those changes, thanks. > > Hi Richard, > > Thanks for the suggestions, updated the attached patch accordingly. > > Bootstrapped+tested with and without SVE on aarch64-linux-gnu and > > x86_64-linux-gnu. > > OK to commit ? > > Yes, thanks. Thanks, committed to trunk in 3ec8ecb8e92faec889bc6f7aeac9ff59e82b4f7f.
Thanks, Prathamesh > > Richard > > > > > Thanks, > > Prathamesh > >> > >> Richard > >> > >> > + { > >> > + if (reason) > >> > + *reason = "not a natural stepped sequence"; > >> > + return false; > >> > + } > >> > + } > >> > } > >> > > >> > return true; > >> > @@ -17161,7 +17183,8 @@ namespace test_fold_vec_perm_cst { > >> > static tree > >> > build_vec_cst_rand (machine_mode vmode, unsigned npatterns, > >> > unsigned nelts_per_pattern, > >> > - int step = 0, int threshold = 100) > >> > + int step = 0, bool natural_stepped = false, > >> > + int threshold = 100) > >> > { > >> > tree inner_type = lang_hooks.types.type_for_mode (GET_MODE_INNER > >> > (vmode), 1); > >> > tree vectype = build_vector_type_for_mode (inner_type, vmode); > >> > @@ -17176,17 +17199,28 @@ build_vec_cst_rand (machine_mode vmode, > >> > unsigned npatterns, > >> > > >> > // Fill a1 for each pattern > >> > for (unsigned i = 0; i < npatterns; i++) > >> > - builder.quick_push (build_int_cst (inner_type, rand () % > >> > threshold)); > >> > - > >> > + { > >> > + tree a1; > >> > + if (natural_stepped) > >> > + { > >> > + tree a0 = builder[i]; > >> > + wide_int a0_val = wi::to_wide (a0); > >> > + wide_int a1_val = a0_val + step; > >> > + a1 = wide_int_to_tree (inner_type, a1_val); > >> > + } > >> > + else > >> > + a1 = build_int_cst (inner_type, rand () % threshold); > >> > + builder.quick_push (a1); > >> > + } > >> > if (nelts_per_pattern == 2) > >> > return builder.build (); > >> > > >> > for (unsigned i = npatterns * 2; i < npatterns * nelts_per_pattern; > >> > i++) > >> > { > >> > tree prev_elem = builder[i - npatterns]; > >> > - int prev_elem_val = TREE_INT_CST_LOW (prev_elem); > >> > - int val = prev_elem_val + step; > >> > - builder.quick_push (build_int_cst (inner_type, val)); > >> > + wide_int prev_elem_val = wi::to_wide (prev_elem); > >> > + wide_int val = prev_elem_val + step; > >> > + builder.quick_push (wide_int_to_tree (inner_type, val)); > >> > } > >> > > >> > return builder.build (); > >> > @@ -17432,7 +17466,7 @@ test_nunits_min_2 (machine_mode vmode) > >> > and step (a2 - a1) = 1, step is not a multiple of npatterns > >> > in input vector. So return NULL_TREE. */ > >> > { > >> > - tree arg0 = build_vec_cst_rand (vmode, 2, 3, 1); > >> > + tree arg0 = build_vec_cst_rand (vmode, 2, 3, 1, true); > >> > tree arg1 = build_vec_cst_rand (vmode, 2, 3, 1); > >> > poly_uint64 len = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)); > >> > > >> > @@ -17452,7 +17486,7 @@ test_nunits_min_2 (machine_mode vmode) > >> > Test that stepped sequence of the pattern selects from arg0. > >> > res = { arg1[0], arg0[0], arg0[1], ... } // (1, 3) */ > >> > { > >> > - tree arg0 = build_vec_cst_rand (vmode, 1, 3, 1); > >> > + tree arg0 = build_vec_cst_rand (vmode, 1, 3, 1, true); > >> > tree arg1 = build_vec_cst_rand (vmode, 1, 3, 1); > >> > poly_uint64 len = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)); > >> > > >> > @@ -17466,6 +17500,62 @@ test_nunits_min_2 (machine_mode vmode) > >> > tree expected_res[] = { ARG1(0), ARG0(0), ARG0(1) }; > >> > validate_res (1, 3, res, expected_res); > >> > } > >> > + > >> > + /* Case 6: PR111648 - a1 chooses base element from input vector > >> > arg. > >> > + In this case ensure that arg has a natural stepped sequence > >> > + to preserve arg's encoding. > >> > + > >> > + As a concrete example, consider: > >> > + arg0: { -16, -9, -10, ... } // (1, 3) > >> > + arg1: { -12, -5, -6, ... } // (1, 3) > >> > + sel = { 0, len, len + 1, ... } // (1, 3) > >> > + > >> > + This will create res with following encoding: > >> > + res = { arg0[0], arg1[0], arg1[1], ... } // (1, 3) > >> > + = { -16, -12, -5, ... } > >> > + > >> > + The step in above encoding would be: (-5) - (-12) = 7 > >> > + And hence res[3] would be computed as -5 + 7 = 2. > >> > + instead of arg1[2], ie, -6. > >> > + Ensure that valid_mask_for_fold_vec_perm_cst returns false > >> > + for this case. */ > >> > + { > >> > + tree arg0 = build_vec_cst_rand (vmode, 1, 3, 1); > >> > + tree arg1 = build_vec_cst_rand (vmode, 1, 3, 1); > >> > + poly_uint64 len = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)); > >> > + > >> > + vec_perm_builder builder (len, 1, 3); > >> > + poly_uint64 mask_elems[] = { 0, len, len+1 }; > >> > + builder_push_elems (builder, mask_elems); > >> > + > >> > + vec_perm_indices sel (builder, 2, len); > >> > + const char *reason; > >> > + /* FIXME: It may happen that build_vec_cst_rand may build a natural > >> > + stepped pattern, even if we didn't explicitly tell it to. So > >> > folding > >> > + may not always fail, but if it does, ensure that's because arg1 > >> > does > >> > + not have a natural stepped sequence (and not due to other > >> > reason) */ > >> > + tree res = fold_vec_perm_cst (TREE_TYPE (arg0), arg0, arg1, sel, > >> > &reason); > >> > + if (res == NULL_TREE) > >> > + ASSERT_TRUE (!strcmp (reason, "not a natural stepped sequence")); > >> > + } > >> > + > >> > + /* Case 7: Same as Case 6, except that arg1 contains natural > >> > stepped > >> > + sequence and thus folding should be valid for this case. */ > >> > + { > >> > + tree arg0 = build_vec_cst_rand (vmode, 1, 3, 1); > >> > + tree arg1 = build_vec_cst_rand (vmode, 1, 3, 1, true); > >> > + poly_uint64 len = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)); > >> > + > >> > + vec_perm_builder builder (len, 1, 3); > >> > + poly_uint64 mask_elems[] = { 0, len, len+1 }; > >> > + builder_push_elems (builder, mask_elems); > >> > + > >> > + vec_perm_indices sel (builder, 2, len); > >> > + tree res = fold_vec_perm_cst (TREE_TYPE (arg0), arg0, arg1, sel); > >> > + > >> > + tree expected_res[] = { ARG0(0), ARG1(0), ARG1(1) }; > >> > + validate_res (1, 3, res, expected_res); > >> > + } > >> > } > >> > } > >> > > >> > diff --git a/gcc/testsuite/gcc.dg/vect/pr111648.c > >> > b/gcc/testsuite/gcc.dg/vect/pr111648.c > >> > new file mode 100644 > >> > index 00000000000..093e2b02654 > >> > --- /dev/null > >> > +++ b/gcc/testsuite/gcc.dg/vect/pr111648.c > >> > @@ -0,0 +1,23 @@ > >> > +/* { dg-do compile } */ > >> > +/* { dg-options "-O2 -fdump-tree-optimized" } */ > >> > + > >> > +int a; > >> > +int *b = &a; > >> > +static int **c = &b; > >> > +static int d; > >> > +short e; > >> > +short f; > >> > + > >> > +_Bool foo () > >> > +{ > >> > + f = -21; > >> > + for (; f < -5; f++) { > >> > + e = f ^ 3; > >> > + d = *b; > >> > + **c = e; > >> > + } > >> > + > >> > + return d == -6; > >> > +} > >> > + > >> > +/* { dg-final { scan-tree-dump "return 1" "optimized" } } */ > > > > diff --git a/gcc/fold-const.cc b/gcc/fold-const.cc > > index 44118e799eb..40767736389 100644 > > --- a/gcc/fold-const.cc > > +++ b/gcc/fold-const.cc > > @@ -10692,9 +10692,8 @@ valid_mask_for_fold_vec_perm_cst_p (tree arg0, tree > > arg1, > > > > /* Ensure that the stepped sequence always selects from the same > > input pattern. */ > > - unsigned arg_npatterns > > - = ((q1 & 1) == 0) ? VECTOR_CST_NPATTERNS (arg0) > > - : VECTOR_CST_NPATTERNS (arg1); > > + tree arg = ((q1 & 1) == 0) ? arg0 : arg1; > > + unsigned arg_npatterns = VECTOR_CST_NPATTERNS (arg); > > > > if (!multiple_p (step, arg_npatterns)) > > { > > @@ -10702,6 +10701,31 @@ valid_mask_for_fold_vec_perm_cst_p (tree arg0, > > tree arg1, > > *reason = "step is not multiple of npatterns"; > > return false; > > } > > + > > + /* If a1 chooses base element from arg, ensure that it's a natural > > + stepped sequence, ie, (arg[2] - arg[1]) == (arg[1] - arg[0]) > > + to preserve arg's encoding. */ > > + > > + if (maybe_lt (r1, arg_npatterns)) > > + { > > + unsigned HOST_WIDE_INT index; > > + if (!r1.is_constant (&index)) > > + return false; > > + > > + tree arg_elem0 = vector_cst_elt (arg, index); > > + tree arg_elem1 = vector_cst_elt (arg, index + arg_npatterns); > > + tree arg_elem2 = vector_cst_elt (arg, index + arg_npatterns * 2); > > + > > + tree step1, step2; > > + if (!(step1 = const_binop (MINUS_EXPR, arg_elem1, arg_elem0)) > > + || !(step2 = const_binop (MINUS_EXPR, arg_elem2, arg_elem1)) > > + || !operand_equal_p (step1, step2, 0)) > > + { > > + if (reason) > > + *reason = "not a natural stepped sequence"; > > + return false; > > + } > > + } > > } > > > > return true; > > @@ -17165,7 +17189,8 @@ namespace test_fold_vec_perm_cst { > > static tree > > build_vec_cst_rand (machine_mode vmode, unsigned npatterns, > > unsigned nelts_per_pattern, > > - int step = 0, int threshold = 100) > > + int step = 0, bool natural_stepped = false, > > + int threshold = 100) > > { > > tree inner_type = lang_hooks.types.type_for_mode (GET_MODE_INNER > > (vmode), 1); > > tree vectype = build_vector_type_for_mode (inner_type, vmode); > > @@ -17180,17 +17205,28 @@ build_vec_cst_rand (machine_mode vmode, unsigned > > npatterns, > > > > // Fill a1 for each pattern > > for (unsigned i = 0; i < npatterns; i++) > > - builder.quick_push (build_int_cst (inner_type, rand () % threshold)); > > - > > + { > > + tree a1; > > + if (natural_stepped) > > + { > > + tree a0 = builder[i]; > > + wide_int a0_val = wi::to_wide (a0); > > + wide_int a1_val = a0_val + step; > > + a1 = wide_int_to_tree (inner_type, a1_val); > > + } > > + else > > + a1 = build_int_cst (inner_type, rand () % threshold); > > + builder.quick_push (a1); > > + } > > if (nelts_per_pattern == 2) > > return builder.build (); > > > > for (unsigned i = npatterns * 2; i < npatterns * nelts_per_pattern; i++) > > { > > tree prev_elem = builder[i - npatterns]; > > - int prev_elem_val = TREE_INT_CST_LOW (prev_elem); > > - int val = prev_elem_val + step; > > - builder.quick_push (build_int_cst (inner_type, val)); > > + wide_int prev_elem_val = wi::to_wide (prev_elem); > > + wide_int val = prev_elem_val + step; > > + builder.quick_push (wide_int_to_tree (inner_type, val)); > > } > > > > return builder.build (); > > @@ -17436,7 +17472,7 @@ test_nunits_min_2 (machine_mode vmode) > > and step (a2 - a1) = 1, step is not a multiple of npatterns > > in input vector. So return NULL_TREE. */ > > { > > - tree arg0 = build_vec_cst_rand (vmode, 2, 3, 1); > > + tree arg0 = build_vec_cst_rand (vmode, 2, 3, 1, true); > > tree arg1 = build_vec_cst_rand (vmode, 2, 3, 1); > > poly_uint64 len = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)); > > > > @@ -17456,7 +17492,7 @@ test_nunits_min_2 (machine_mode vmode) > > Test that stepped sequence of the pattern selects from arg0. > > res = { arg1[0], arg0[0], arg0[1], ... } // (1, 3) */ > > { > > - tree arg0 = build_vec_cst_rand (vmode, 1, 3, 1); > > + tree arg0 = build_vec_cst_rand (vmode, 1, 3, 1, true); > > tree arg1 = build_vec_cst_rand (vmode, 1, 3, 1); > > poly_uint64 len = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)); > > > > @@ -17470,6 +17506,62 @@ test_nunits_min_2 (machine_mode vmode) > > tree expected_res[] = { ARG1(0), ARG0(0), ARG0(1) }; > > validate_res (1, 3, res, expected_res); > > } > > + > > + /* Case 6: PR111648 - a1 chooses base element from input vector arg. > > + In this case ensure that arg has a natural stepped sequence > > + to preserve arg's encoding. > > + > > + As a concrete example, consider: > > + arg0: { -16, -9, -10, ... } // (1, 3) > > + arg1: { -12, -5, -6, ... } // (1, 3) > > + sel = { 0, len, len + 1, ... } // (1, 3) > > + > > + This will create res with following encoding: > > + res = { arg0[0], arg1[0], arg1[1], ... } // (1, 3) > > + = { -16, -12, -5, ... } > > + > > + The step in above encoding would be: (-5) - (-12) = 7 > > + And hence res[3] would be computed as -5 + 7 = 2. > > + instead of arg1[2], ie, -6. > > + Ensure that valid_mask_for_fold_vec_perm_cst returns false > > + for this case. */ > > + { > > + tree arg0 = build_vec_cst_rand (vmode, 1, 3, 1); > > + tree arg1 = build_vec_cst_rand (vmode, 1, 3, 1); > > + poly_uint64 len = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)); > > + > > + vec_perm_builder builder (len, 1, 3); > > + poly_uint64 mask_elems[] = { 0, len, len+1 }; > > + builder_push_elems (builder, mask_elems); > > + > > + vec_perm_indices sel (builder, 2, len); > > + const char *reason; > > + /* FIXME: It may happen that build_vec_cst_rand may build a natural > > + stepped pattern, even if we didn't explicitly tell it to. So > > folding > > + may not always fail, but if it does, ensure that's because arg1 > > does > > + not have a natural stepped sequence (and not due to other reason) > > */ > > + tree res = fold_vec_perm_cst (TREE_TYPE (arg0), arg0, arg1, sel, > > &reason); > > + if (res == NULL_TREE) > > + ASSERT_TRUE (!strcmp (reason, "not a natural stepped sequence")); > > + } > > + > > + /* Case 7: Same as Case 6, except that arg1 contains natural stepped > > + sequence and thus folding should be valid for this case. */ > > + { > > + tree arg0 = build_vec_cst_rand (vmode, 1, 3, 1); > > + tree arg1 = build_vec_cst_rand (vmode, 1, 3, 1, true); > > + poly_uint64 len = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)); > > + > > + vec_perm_builder builder (len, 1, 3); > > + poly_uint64 mask_elems[] = { 0, len, len+1 }; > > + builder_push_elems (builder, mask_elems); > > + > > + vec_perm_indices sel (builder, 2, len); > > + tree res = fold_vec_perm_cst (TREE_TYPE (arg0), arg0, arg1, sel); > > + > > + tree expected_res[] = { ARG0(0), ARG1(0), ARG1(1) }; > > + validate_res (1, 3, res, expected_res); > > + } > > } > > } > >