Re: [match.pd] [SVE] Add pattern to transform svrev(svrev(v)) --> v
On Mon, 24 Apr 2023 at 15:02, Richard Sandiford wrote: > > Prathamesh Kulkarni writes: > > gcc/ChangeLog: > > * tree-ssa-forwprop.cc (is_combined_permutation_identity): Try to > > simplify two successive VEC_PERM_EXPRs with single operand and same > > mask, where mask chooses elements in reverse order. > > > > gcc/testesuite/ChangeLog: > > * gcc.target/aarch64/sve/acle/general/rev-1.c: New test. > > > > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/rev-1.c > > b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/rev-1.c > > new file mode 100644 > > index 000..e57ee67d716 > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/rev-1.c > > @@ -0,0 +1,12 @@ > > +/* { dg-do compile } */ > > +/* { dg-options "-O3 -fdump-tree-optimized" } */ > > + > > +#include > > + > > +svint32_t f(svint32_t v) > > +{ > > + return svrev_s32 (svrev_s32 (v)); > > +} > > + > > +/* { dg-final { scan-tree-dump "return v_1\\(D\\)" "optimized" } } */ > > +/* { dg-final { scan-tree-dump-not "VEC_PERM_EXPR" "optimized" } } */ > > diff --git a/gcc/tree-ssa-forwprop.cc b/gcc/tree-ssa-forwprop.cc > > index 9b567440ba4..61df7efe82c 100644 > > --- a/gcc/tree-ssa-forwprop.cc > > +++ b/gcc/tree-ssa-forwprop.cc > > @@ -2541,6 +2541,27 @@ is_combined_permutation_identity (tree mask1, tree > > mask2) > > > >gcc_checking_assert (TREE_CODE (mask1) == VECTOR_CST > > && TREE_CODE (mask2) == VECTOR_CST); > > + > > + /* For VLA masks, check for the following pattern: > > + v1 = VEC_PERM_EXPR (v0, v0, mask1) > > + v2 = VEC_PERM_EXPR (v1, v1, mask2) > > Maybe blank out the second operands using "...": > > v1 = VEC_PERM_EXPR (v0, ..., mask1) > v2 = VEC_PERM_EXPR (v1, ..., mask2) > > to make it clear that they don't matter. > > OK with that change, thanks. Thanks, committed in: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=f0eabc52c9a2d3da0bfc201da7a5c1658b76e9a4 Thanks, Prathamesh > > Richard > > > + --> > > + v2 = v0 > > + if mask1 == mask2 == {nelts - 1, nelts - 2, ...}. */ > > + > > + if (operand_equal_p (mask1, mask2, 0) > > + && !VECTOR_CST_NELTS (mask1).is_constant ()) > > +{ > > + vec_perm_builder builder; > > + if (tree_to_vec_perm_builder (&builder, mask1)) > > + { > > + poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask1)); > > + vec_perm_indices sel (builder, 1, nelts); > > + if (sel.series_p (0, 1, nelts - 1, -1)) > > + return 1; > > + } > > +} > > + > >mask = fold_ternary (VEC_PERM_EXPR, TREE_TYPE (mask1), mask1, mask1, > > mask2); > >if (mask == NULL_TREE || TREE_CODE (mask) != VECTOR_CST) > > return 0;
Re: [match.pd] [SVE] Add pattern to transform svrev(svrev(v)) --> v
Prathamesh Kulkarni writes: > gcc/ChangeLog: > * tree-ssa-forwprop.cc (is_combined_permutation_identity): Try to > simplify two successive VEC_PERM_EXPRs with single operand and same > mask, where mask chooses elements in reverse order. > > gcc/testesuite/ChangeLog: > * gcc.target/aarch64/sve/acle/general/rev-1.c: New test. > > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/rev-1.c > b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/rev-1.c > new file mode 100644 > index 000..e57ee67d716 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/rev-1.c > @@ -0,0 +1,12 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O3 -fdump-tree-optimized" } */ > + > +#include > + > +svint32_t f(svint32_t v) > +{ > + return svrev_s32 (svrev_s32 (v)); > +} > + > +/* { dg-final { scan-tree-dump "return v_1\\(D\\)" "optimized" } } */ > +/* { dg-final { scan-tree-dump-not "VEC_PERM_EXPR" "optimized" } } */ > diff --git a/gcc/tree-ssa-forwprop.cc b/gcc/tree-ssa-forwprop.cc > index 9b567440ba4..61df7efe82c 100644 > --- a/gcc/tree-ssa-forwprop.cc > +++ b/gcc/tree-ssa-forwprop.cc > @@ -2541,6 +2541,27 @@ is_combined_permutation_identity (tree mask1, tree > mask2) > >gcc_checking_assert (TREE_CODE (mask1) == VECTOR_CST > && TREE_CODE (mask2) == VECTOR_CST); > + > + /* For VLA masks, check for the following pattern: > + v1 = VEC_PERM_EXPR (v0, v0, mask1) > + v2 = VEC_PERM_EXPR (v1, v1, mask2) Maybe blank out the second operands using "...": v1 = VEC_PERM_EXPR (v0, ..., mask1) v2 = VEC_PERM_EXPR (v1, ..., mask2) to make it clear that they don't matter. OK with that change, thanks. Richard > + --> > + v2 = v0 > + if mask1 == mask2 == {nelts - 1, nelts - 2, ...}. */ > + > + if (operand_equal_p (mask1, mask2, 0) > + && !VECTOR_CST_NELTS (mask1).is_constant ()) > +{ > + vec_perm_builder builder; > + if (tree_to_vec_perm_builder (&builder, mask1)) > + { > + poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask1)); > + vec_perm_indices sel (builder, 1, nelts); > + if (sel.series_p (0, 1, nelts - 1, -1)) > + return 1; > + } > +} > + >mask = fold_ternary (VEC_PERM_EXPR, TREE_TYPE (mask1), mask1, mask1, > mask2); >if (mask == NULL_TREE || TREE_CODE (mask) != VECTOR_CST) > return 0;
Re: [match.pd] [SVE] Add pattern to transform svrev(svrev(v)) --> v
On Fri, 21 Apr 2023 at 21:57, Richard Sandiford wrote: > > Prathamesh Kulkarni writes: > > On Wed, 19 Apr 2023 at 16:17, Richard Biener > > wrote: > >> > >> On Wed, Apr 19, 2023 at 11:21 AM Prathamesh Kulkarni > >> wrote: > >> > > >> > On Tue, 11 Apr 2023 at 19:36, Prathamesh Kulkarni > >> > wrote: > >> > > > >> > > On Tue, 11 Apr 2023 at 14:17, Richard Biener > >> > > wrote: > >> > > > > >> > > > On Wed, Apr 5, 2023 at 10:39 AM Prathamesh Kulkarni via Gcc-patches > >> > > > wrote: > >> > > > > > >> > > > > Hi, > >> > > > > For the following test: > >> > > > > > >> > > > > svint32_t f(svint32_t v) > >> > > > > { > >> > > > > return svrev_s32 (svrev_s32 (v)); > >> > > > > } > >> > > > > > >> > > > > We generate 2 rev instructions instead of nop: > >> > > > > f: > >> > > > > rev z0.s, z0.s > >> > > > > rev z0.s, z0.s > >> > > > > ret > >> > > > > > >> > > > > The attached patch tries to fix that by trying to recognize the > >> > > > > following > >> > > > > pattern in match.pd: > >> > > > > v1 = VEC_PERM_EXPR (v0, v0, mask) > >> > > > > v2 = VEC_PERM_EXPR (v1, v1, mask) > >> > > > > --> > >> > > > > v2 = v0 > >> > > > > if mask is { nelts - 1, nelts - 2, nelts - 3, ... } > >> > > > > > >> > > > > Code-gen with patch: > >> > > > > f: > >> > > > > ret > >> > > > > > >> > > > > Bootstrap+test passes on aarch64-linux-gnu, and SVE bootstrap in > >> > > > > progress. > >> > > > > Does it look OK for stage-1 ? > >> > > > > >> > > > I didn't look at the patch but > >> > > > tree-ssa-forwprop.cc:simplify_permutation should > >> > > > handle two consecutive permutes with the > >> > > > is_combined_permutation_identity > >> > > > which might need tweaking for VLA vectors > >> > > Hi Richard, > >> > > Thanks for the suggestions. The attached patch modifies > >> > > is_combined_permutation_identity > >> > > to recognize the above pattern. > >> > > Does it look OK ? > >> > > Bootstrap+test in progress on aarch64-linux-gnu and x86_64-linux-gnu. > >> > Hi, > >> > ping https://gcc.gnu.org/pipermail/gcc-patches/2023-April/615502.html > >> > >> Can you instead of def_stmt pass in a bool whether rhs1 is equal to rhs2 > >> and amend the function comment accordingly, say, > >> > >> tem = VEC_PERM ; > >> res = VEC_PERM ; > >> > >> SAME_P specifies whether op0 and op1 compare equal. */ > >> > >> + if (def_stmt) > >> +gcc_checking_assert (is_gimple_assign (def_stmt) > >> +&& gimple_assign_rhs_code (def_stmt) == > >> VEC_PERM_EXPR); > >> this is then unnecessary > >> > >>mask = fold_ternary (VEC_PERM_EXPR, TREE_TYPE (mask1), mask1, mask1, > >> mask2); > >> + > >> + /* For VLA masks, check for the following pattern: > >> + v1 = VEC_PERM_EXPR (v0, v0, mask) > >> + v2 = VEC_PERM_EXPR (v1, v1, mask) > >> + --> > >> + v2 = v0 > >> > >> you are not using 'mask' so please defer fold_ternary until after your > >> special-case. > >> > >> + if (operand_equal_p (mask1, mask2, 0) > >> + && !VECTOR_CST_NELTS (mask1).is_constant () > >> + && def_stmt > >> + && operand_equal_p (gimple_assign_rhs1 (def_stmt), > >> + gimple_assign_rhs2 (def_stmt), 0)) > >> +{ > >> + vec_perm_builder builder; > >> + if (tree_to_vec_perm_builder (&builder, mask1)) > >> + { > >> + poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask1)); > >> + vec_perm_indices sel (builder, 1, nelts); > >> + if (sel.series_p (0, 1, nelts - 1, -1)) > >> + return 1; > >> + } > >> + return 0; > >> > >> I'm defering to Richard whether this is the correct way to check for a > >> vector > >> reversing mask (I wonder how constructing such mask is even possible) > > Hi Richard, > > Thanks for the suggestions, I have updated the patch accordingly. > > > > The following hunk from svrev_impl::fold() constructs mask in reverse: > > /* Permute as { nelts - 1, nelts - 2, nelts - 3, ... }. */ > > poly_int64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (f.lhs)); > > vec_perm_builder builder (nelts, 1, 3); > > for (int i = 0; i < 3; ++i) > > builder.quick_push (nelts - i - 1); > > return fold_permute (f, builder); > > > > To see if mask chooses elements in reverse, I borrowed it from function > > comment > > for series_p in vec-perm-indices.cc: > > /* Return true if index OUT_BASE + I * OUT_STEP selects input > >element IN_BASE + I * IN_STEP. For example, the call to test > >whether a permute reverses a vector of N elements would be: > > > > series_p (0, 1, N - 1, -1) > > > >which would return true for { N - 1, N - 2, N - 3, ... }. */ > > > > Thanks, > > Prathamesh > >> > >> Richard. > >> > >> > Thanks, > >> > Prathamesh > >> > > > >> > > Thanks, > >> > > Prathamesh > >> > > > > >> > > > Richard. > >> > > > > >> > > > > > >> > > > > Thanks, > >> > > > > Prathamesh > > > > gcc/ChangeLog: > > * tree-ssa-forwprop.cc (is_combined_permutation
Re: [match.pd] [SVE] Add pattern to transform svrev(svrev(v)) --> v
Prathamesh Kulkarni writes: > On Wed, 19 Apr 2023 at 16:17, Richard Biener > wrote: >> >> On Wed, Apr 19, 2023 at 11:21 AM Prathamesh Kulkarni >> wrote: >> > >> > On Tue, 11 Apr 2023 at 19:36, Prathamesh Kulkarni >> > wrote: >> > > >> > > On Tue, 11 Apr 2023 at 14:17, Richard Biener >> > > wrote: >> > > > >> > > > On Wed, Apr 5, 2023 at 10:39 AM Prathamesh Kulkarni via Gcc-patches >> > > > wrote: >> > > > > >> > > > > Hi, >> > > > > For the following test: >> > > > > >> > > > > svint32_t f(svint32_t v) >> > > > > { >> > > > > return svrev_s32 (svrev_s32 (v)); >> > > > > } >> > > > > >> > > > > We generate 2 rev instructions instead of nop: >> > > > > f: >> > > > > rev z0.s, z0.s >> > > > > rev z0.s, z0.s >> > > > > ret >> > > > > >> > > > > The attached patch tries to fix that by trying to recognize the >> > > > > following >> > > > > pattern in match.pd: >> > > > > v1 = VEC_PERM_EXPR (v0, v0, mask) >> > > > > v2 = VEC_PERM_EXPR (v1, v1, mask) >> > > > > --> >> > > > > v2 = v0 >> > > > > if mask is { nelts - 1, nelts - 2, nelts - 3, ... } >> > > > > >> > > > > Code-gen with patch: >> > > > > f: >> > > > > ret >> > > > > >> > > > > Bootstrap+test passes on aarch64-linux-gnu, and SVE bootstrap in >> > > > > progress. >> > > > > Does it look OK for stage-1 ? >> > > > >> > > > I didn't look at the patch but >> > > > tree-ssa-forwprop.cc:simplify_permutation should >> > > > handle two consecutive permutes with the >> > > > is_combined_permutation_identity >> > > > which might need tweaking for VLA vectors >> > > Hi Richard, >> > > Thanks for the suggestions. The attached patch modifies >> > > is_combined_permutation_identity >> > > to recognize the above pattern. >> > > Does it look OK ? >> > > Bootstrap+test in progress on aarch64-linux-gnu and x86_64-linux-gnu. >> > Hi, >> > ping https://gcc.gnu.org/pipermail/gcc-patches/2023-April/615502.html >> >> Can you instead of def_stmt pass in a bool whether rhs1 is equal to rhs2 >> and amend the function comment accordingly, say, >> >> tem = VEC_PERM ; >> res = VEC_PERM ; >> >> SAME_P specifies whether op0 and op1 compare equal. */ >> >> + if (def_stmt) >> +gcc_checking_assert (is_gimple_assign (def_stmt) >> +&& gimple_assign_rhs_code (def_stmt) == >> VEC_PERM_EXPR); >> this is then unnecessary >> >>mask = fold_ternary (VEC_PERM_EXPR, TREE_TYPE (mask1), mask1, mask1, >> mask2); >> + >> + /* For VLA masks, check for the following pattern: >> + v1 = VEC_PERM_EXPR (v0, v0, mask) >> + v2 = VEC_PERM_EXPR (v1, v1, mask) >> + --> >> + v2 = v0 >> >> you are not using 'mask' so please defer fold_ternary until after your >> special-case. >> >> + if (operand_equal_p (mask1, mask2, 0) >> + && !VECTOR_CST_NELTS (mask1).is_constant () >> + && def_stmt >> + && operand_equal_p (gimple_assign_rhs1 (def_stmt), >> + gimple_assign_rhs2 (def_stmt), 0)) >> +{ >> + vec_perm_builder builder; >> + if (tree_to_vec_perm_builder (&builder, mask1)) >> + { >> + poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask1)); >> + vec_perm_indices sel (builder, 1, nelts); >> + if (sel.series_p (0, 1, nelts - 1, -1)) >> + return 1; >> + } >> + return 0; >> >> I'm defering to Richard whether this is the correct way to check for a vector >> reversing mask (I wonder how constructing such mask is even possible) > Hi Richard, > Thanks for the suggestions, I have updated the patch accordingly. > > The following hunk from svrev_impl::fold() constructs mask in reverse: > /* Permute as { nelts - 1, nelts - 2, nelts - 3, ... }. */ > poly_int64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (f.lhs)); > vec_perm_builder builder (nelts, 1, 3); > for (int i = 0; i < 3; ++i) > builder.quick_push (nelts - i - 1); > return fold_permute (f, builder); > > To see if mask chooses elements in reverse, I borrowed it from function > comment > for series_p in vec-perm-indices.cc: > /* Return true if index OUT_BASE + I * OUT_STEP selects input >element IN_BASE + I * IN_STEP. For example, the call to test >whether a permute reverses a vector of N elements would be: > > series_p (0, 1, N - 1, -1) > >which would return true for { N - 1, N - 2, N - 3, ... }. */ > > Thanks, > Prathamesh >> >> Richard. >> >> > Thanks, >> > Prathamesh >> > > >> > > Thanks, >> > > Prathamesh >> > > > >> > > > Richard. >> > > > >> > > > > >> > > > > Thanks, >> > > > > Prathamesh > > gcc/ChangeLog: > * tree-ssa-forwprop.cc (is_combined_permutation_identity): > New parameter same_p. > Try to simplify two successive VEC_PERM_EXPRs with single operand > and same mask, where mask chooses elements in reverse order. > > gcc/testesuite/ChangeLog: > * gcc.target/aarch64/sve/acle/general/rev-1.c: New test. > > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/rev-1.c
Re: [match.pd] [SVE] Add pattern to transform svrev(svrev(v)) --> v
On Wed, Apr 19, 2023 at 2:20 PM Prathamesh Kulkarni wrote: > > On Wed, 19 Apr 2023 at 16:17, Richard Biener > wrote: > > > > On Wed, Apr 19, 2023 at 11:21 AM Prathamesh Kulkarni > > wrote: > > > > > > On Tue, 11 Apr 2023 at 19:36, Prathamesh Kulkarni > > > wrote: > > > > > > > > On Tue, 11 Apr 2023 at 14:17, Richard Biener > > > > wrote: > > > > > > > > > > On Wed, Apr 5, 2023 at 10:39 AM Prathamesh Kulkarni via Gcc-patches > > > > > wrote: > > > > > > > > > > > > Hi, > > > > > > For the following test: > > > > > > > > > > > > svint32_t f(svint32_t v) > > > > > > { > > > > > > return svrev_s32 (svrev_s32 (v)); > > > > > > } > > > > > > > > > > > > We generate 2 rev instructions instead of nop: > > > > > > f: > > > > > > rev z0.s, z0.s > > > > > > rev z0.s, z0.s > > > > > > ret > > > > > > > > > > > > The attached patch tries to fix that by trying to recognize the > > > > > > following > > > > > > pattern in match.pd: > > > > > > v1 = VEC_PERM_EXPR (v0, v0, mask) > > > > > > v2 = VEC_PERM_EXPR (v1, v1, mask) > > > > > > --> > > > > > > v2 = v0 > > > > > > if mask is { nelts - 1, nelts - 2, nelts - 3, ... } > > > > > > > > > > > > Code-gen with patch: > > > > > > f: > > > > > > ret > > > > > > > > > > > > Bootstrap+test passes on aarch64-linux-gnu, and SVE bootstrap in > > > > > > progress. > > > > > > Does it look OK for stage-1 ? > > > > > > > > > > I didn't look at the patch but > > > > > tree-ssa-forwprop.cc:simplify_permutation should > > > > > handle two consecutive permutes with the > > > > > is_combined_permutation_identity > > > > > which might need tweaking for VLA vectors > > > > Hi Richard, > > > > Thanks for the suggestions. The attached patch modifies > > > > is_combined_permutation_identity > > > > to recognize the above pattern. > > > > Does it look OK ? > > > > Bootstrap+test in progress on aarch64-linux-gnu and x86_64-linux-gnu. > > > Hi, > > > ping https://gcc.gnu.org/pipermail/gcc-patches/2023-April/615502.html > > > > Can you instead of def_stmt pass in a bool whether rhs1 is equal to rhs2 > > and amend the function comment accordingly, say, > > > > tem = VEC_PERM ; > > res = VEC_PERM ; > > > > SAME_P specifies whether op0 and op1 compare equal. */ > > > > + if (def_stmt) > > +gcc_checking_assert (is_gimple_assign (def_stmt) > > +&& gimple_assign_rhs_code (def_stmt) == > > VEC_PERM_EXPR); > > this is then unnecessary > > > >mask = fold_ternary (VEC_PERM_EXPR, TREE_TYPE (mask1), mask1, mask1, > > mask2); > > + > > + /* For VLA masks, check for the following pattern: > > + v1 = VEC_PERM_EXPR (v0, v0, mask) > > + v2 = VEC_PERM_EXPR (v1, v1, mask) > > + --> > > + v2 = v0 > > > > you are not using 'mask' so please defer fold_ternary until after your > > special-case. > > > > + if (operand_equal_p (mask1, mask2, 0) > > + && !VECTOR_CST_NELTS (mask1).is_constant () > > + && def_stmt > > + && operand_equal_p (gimple_assign_rhs1 (def_stmt), > > + gimple_assign_rhs2 (def_stmt), 0)) > > +{ > > + vec_perm_builder builder; > > + if (tree_to_vec_perm_builder (&builder, mask1)) > > + { > > + poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask1)); > > + vec_perm_indices sel (builder, 1, nelts); > > + if (sel.series_p (0, 1, nelts - 1, -1)) > > + return 1; > > + } > > + return 0; > > > > I'm defering to Richard whether this is the correct way to check for a > > vector > > reversing mask (I wonder how constructing such mask is even possible) > Hi Richard, > Thanks for the suggestions, I have updated the patch accordingly. > > The following hunk from svrev_impl::fold() constructs mask in reverse: > /* Permute as { nelts - 1, nelts - 2, nelts - 3, ... }. */ > poly_int64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (f.lhs)); > vec_perm_builder builder (nelts, 1, 3); > for (int i = 0; i < 3; ++i) > builder.quick_push (nelts - i - 1); > return fold_permute (f, builder); > > To see if mask chooses elements in reverse, I borrowed it from function > comment > for series_p in vec-perm-indices.cc: > /* Return true if index OUT_BASE + I * OUT_STEP selects input >element IN_BASE + I * IN_STEP. For example, the call to test >whether a permute reverses a vector of N elements would be: > > series_p (0, 1, N - 1, -1) > >which would return true for { N - 1, N - 2, N - 3, ... }. */ Looks good from my side now, but as said I defer to Richard for the check. Richard. > Thanks, > Prathamesh > > > > Richard. > > > > > Thanks, > > > Prathamesh > > > > > > > > Thanks, > > > > Prathamesh > > > > > > > > > > Richard. > > > > > > > > > > > > > > > > > Thanks, > > > > > > Prathamesh
Re: [match.pd] [SVE] Add pattern to transform svrev(svrev(v)) --> v
On Wed, 19 Apr 2023 at 16:17, Richard Biener wrote: > > On Wed, Apr 19, 2023 at 11:21 AM Prathamesh Kulkarni > wrote: > > > > On Tue, 11 Apr 2023 at 19:36, Prathamesh Kulkarni > > wrote: > > > > > > On Tue, 11 Apr 2023 at 14:17, Richard Biener > > > wrote: > > > > > > > > On Wed, Apr 5, 2023 at 10:39 AM Prathamesh Kulkarni via Gcc-patches > > > > wrote: > > > > > > > > > > Hi, > > > > > For the following test: > > > > > > > > > > svint32_t f(svint32_t v) > > > > > { > > > > > return svrev_s32 (svrev_s32 (v)); > > > > > } > > > > > > > > > > We generate 2 rev instructions instead of nop: > > > > > f: > > > > > rev z0.s, z0.s > > > > > rev z0.s, z0.s > > > > > ret > > > > > > > > > > The attached patch tries to fix that by trying to recognize the > > > > > following > > > > > pattern in match.pd: > > > > > v1 = VEC_PERM_EXPR (v0, v0, mask) > > > > > v2 = VEC_PERM_EXPR (v1, v1, mask) > > > > > --> > > > > > v2 = v0 > > > > > if mask is { nelts - 1, nelts - 2, nelts - 3, ... } > > > > > > > > > > Code-gen with patch: > > > > > f: > > > > > ret > > > > > > > > > > Bootstrap+test passes on aarch64-linux-gnu, and SVE bootstrap in > > > > > progress. > > > > > Does it look OK for stage-1 ? > > > > > > > > I didn't look at the patch but > > > > tree-ssa-forwprop.cc:simplify_permutation should > > > > handle two consecutive permutes with the > > > > is_combined_permutation_identity > > > > which might need tweaking for VLA vectors > > > Hi Richard, > > > Thanks for the suggestions. The attached patch modifies > > > is_combined_permutation_identity > > > to recognize the above pattern. > > > Does it look OK ? > > > Bootstrap+test in progress on aarch64-linux-gnu and x86_64-linux-gnu. > > Hi, > > ping https://gcc.gnu.org/pipermail/gcc-patches/2023-April/615502.html > > Can you instead of def_stmt pass in a bool whether rhs1 is equal to rhs2 > and amend the function comment accordingly, say, > > tem = VEC_PERM ; > res = VEC_PERM ; > > SAME_P specifies whether op0 and op1 compare equal. */ > > + if (def_stmt) > +gcc_checking_assert (is_gimple_assign (def_stmt) > +&& gimple_assign_rhs_code (def_stmt) == > VEC_PERM_EXPR); > this is then unnecessary > >mask = fold_ternary (VEC_PERM_EXPR, TREE_TYPE (mask1), mask1, mask1, > mask2); > + > + /* For VLA masks, check for the following pattern: > + v1 = VEC_PERM_EXPR (v0, v0, mask) > + v2 = VEC_PERM_EXPR (v1, v1, mask) > + --> > + v2 = v0 > > you are not using 'mask' so please defer fold_ternary until after your > special-case. > > + if (operand_equal_p (mask1, mask2, 0) > + && !VECTOR_CST_NELTS (mask1).is_constant () > + && def_stmt > + && operand_equal_p (gimple_assign_rhs1 (def_stmt), > + gimple_assign_rhs2 (def_stmt), 0)) > +{ > + vec_perm_builder builder; > + if (tree_to_vec_perm_builder (&builder, mask1)) > + { > + poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask1)); > + vec_perm_indices sel (builder, 1, nelts); > + if (sel.series_p (0, 1, nelts - 1, -1)) > + return 1; > + } > + return 0; > > I'm defering to Richard whether this is the correct way to check for a vector > reversing mask (I wonder how constructing such mask is even possible) Hi Richard, Thanks for the suggestions, I have updated the patch accordingly. The following hunk from svrev_impl::fold() constructs mask in reverse: /* Permute as { nelts - 1, nelts - 2, nelts - 3, ... }. */ poly_int64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (f.lhs)); vec_perm_builder builder (nelts, 1, 3); for (int i = 0; i < 3; ++i) builder.quick_push (nelts - i - 1); return fold_permute (f, builder); To see if mask chooses elements in reverse, I borrowed it from function comment for series_p in vec-perm-indices.cc: /* Return true if index OUT_BASE + I * OUT_STEP selects input element IN_BASE + I * IN_STEP. For example, the call to test whether a permute reverses a vector of N elements would be: series_p (0, 1, N - 1, -1) which would return true for { N - 1, N - 2, N - 3, ... }. */ Thanks, Prathamesh > > Richard. > > > Thanks, > > Prathamesh > > > > > > Thanks, > > > Prathamesh > > > > > > > > Richard. > > > > > > > > > > > > > > Thanks, > > > > > Prathamesh gcc/ChangeLog: * tree-ssa-forwprop.cc (is_combined_permutation_identity): New parameter same_p. Try to simplify two successive VEC_PERM_EXPRs with single operand and same mask, where mask chooses elements in reverse order. gcc/testesuite/ChangeLog: * gcc.target/aarch64/sve/acle/general/rev-1.c: New test. diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/rev-1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/rev-1.c new file mode 100644 index 000..e57ee67d716 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/rev-1.c @@ -0,0 +1,12
Re: [match.pd] [SVE] Add pattern to transform svrev(svrev(v)) --> v
On Wed, Apr 19, 2023 at 11:21 AM Prathamesh Kulkarni wrote: > > On Tue, 11 Apr 2023 at 19:36, Prathamesh Kulkarni > wrote: > > > > On Tue, 11 Apr 2023 at 14:17, Richard Biener > > wrote: > > > > > > On Wed, Apr 5, 2023 at 10:39 AM Prathamesh Kulkarni via Gcc-patches > > > wrote: > > > > > > > > Hi, > > > > For the following test: > > > > > > > > svint32_t f(svint32_t v) > > > > { > > > > return svrev_s32 (svrev_s32 (v)); > > > > } > > > > > > > > We generate 2 rev instructions instead of nop: > > > > f: > > > > rev z0.s, z0.s > > > > rev z0.s, z0.s > > > > ret > > > > > > > > The attached patch tries to fix that by trying to recognize the > > > > following > > > > pattern in match.pd: > > > > v1 = VEC_PERM_EXPR (v0, v0, mask) > > > > v2 = VEC_PERM_EXPR (v1, v1, mask) > > > > --> > > > > v2 = v0 > > > > if mask is { nelts - 1, nelts - 2, nelts - 3, ... } > > > > > > > > Code-gen with patch: > > > > f: > > > > ret > > > > > > > > Bootstrap+test passes on aarch64-linux-gnu, and SVE bootstrap in > > > > progress. > > > > Does it look OK for stage-1 ? > > > > > > I didn't look at the patch but tree-ssa-forwprop.cc:simplify_permutation > > > should > > > handle two consecutive permutes with the is_combined_permutation_identity > > > which might need tweaking for VLA vectors > > Hi Richard, > > Thanks for the suggestions. The attached patch modifies > > is_combined_permutation_identity > > to recognize the above pattern. > > Does it look OK ? > > Bootstrap+test in progress on aarch64-linux-gnu and x86_64-linux-gnu. > Hi, > ping https://gcc.gnu.org/pipermail/gcc-patches/2023-April/615502.html Can you instead of def_stmt pass in a bool whether rhs1 is equal to rhs2 and amend the function comment accordingly, say, tem = VEC_PERM ; res = VEC_PERM ; SAME_P specifies whether op0 and op1 compare equal. */ + if (def_stmt) +gcc_checking_assert (is_gimple_assign (def_stmt) +&& gimple_assign_rhs_code (def_stmt) == VEC_PERM_EXPR); this is then unnecessary mask = fold_ternary (VEC_PERM_EXPR, TREE_TYPE (mask1), mask1, mask1, mask2); + + /* For VLA masks, check for the following pattern: + v1 = VEC_PERM_EXPR (v0, v0, mask) + v2 = VEC_PERM_EXPR (v1, v1, mask) + --> + v2 = v0 you are not using 'mask' so please defer fold_ternary until after your special-case. + if (operand_equal_p (mask1, mask2, 0) + && !VECTOR_CST_NELTS (mask1).is_constant () + && def_stmt + && operand_equal_p (gimple_assign_rhs1 (def_stmt), + gimple_assign_rhs2 (def_stmt), 0)) +{ + vec_perm_builder builder; + if (tree_to_vec_perm_builder (&builder, mask1)) + { + poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask1)); + vec_perm_indices sel (builder, 1, nelts); + if (sel.series_p (0, 1, nelts - 1, -1)) + return 1; + } + return 0; I'm defering to Richard whether this is the correct way to check for a vector reversing mask (I wonder how constructing such mask is even possible) Richard. > Thanks, > Prathamesh > > > > Thanks, > > Prathamesh > > > > > > Richard. > > > > > > > > > > > Thanks, > > > > Prathamesh
Re: [match.pd] [SVE] Add pattern to transform svrev(svrev(v)) --> v
On Tue, 11 Apr 2023 at 19:36, Prathamesh Kulkarni wrote: > > On Tue, 11 Apr 2023 at 14:17, Richard Biener > wrote: > > > > On Wed, Apr 5, 2023 at 10:39 AM Prathamesh Kulkarni via Gcc-patches > > wrote: > > > > > > Hi, > > > For the following test: > > > > > > svint32_t f(svint32_t v) > > > { > > > return svrev_s32 (svrev_s32 (v)); > > > } > > > > > > We generate 2 rev instructions instead of nop: > > > f: > > > rev z0.s, z0.s > > > rev z0.s, z0.s > > > ret > > > > > > The attached patch tries to fix that by trying to recognize the following > > > pattern in match.pd: > > > v1 = VEC_PERM_EXPR (v0, v0, mask) > > > v2 = VEC_PERM_EXPR (v1, v1, mask) > > > --> > > > v2 = v0 > > > if mask is { nelts - 1, nelts - 2, nelts - 3, ... } > > > > > > Code-gen with patch: > > > f: > > > ret > > > > > > Bootstrap+test passes on aarch64-linux-gnu, and SVE bootstrap in progress. > > > Does it look OK for stage-1 ? > > > > I didn't look at the patch but tree-ssa-forwprop.cc:simplify_permutation > > should > > handle two consecutive permutes with the is_combined_permutation_identity > > which might need tweaking for VLA vectors > Hi Richard, > Thanks for the suggestions. The attached patch modifies > is_combined_permutation_identity > to recognize the above pattern. > Does it look OK ? > Bootstrap+test in progress on aarch64-linux-gnu and x86_64-linux-gnu. Hi, ping https://gcc.gnu.org/pipermail/gcc-patches/2023-April/615502.html Thanks, Prathamesh > > Thanks, > Prathamesh > > > > Richard. > > > > > > > > Thanks, > > > Prathamesh
Re: [match.pd] [SVE] Add pattern to transform svrev(svrev(v)) --> v
On Tue, 11 Apr 2023 at 14:17, Richard Biener wrote: > > On Wed, Apr 5, 2023 at 10:39 AM Prathamesh Kulkarni via Gcc-patches > wrote: > > > > Hi, > > For the following test: > > > > svint32_t f(svint32_t v) > > { > > return svrev_s32 (svrev_s32 (v)); > > } > > > > We generate 2 rev instructions instead of nop: > > f: > > rev z0.s, z0.s > > rev z0.s, z0.s > > ret > > > > The attached patch tries to fix that by trying to recognize the following > > pattern in match.pd: > > v1 = VEC_PERM_EXPR (v0, v0, mask) > > v2 = VEC_PERM_EXPR (v1, v1, mask) > > --> > > v2 = v0 > > if mask is { nelts - 1, nelts - 2, nelts - 3, ... } > > > > Code-gen with patch: > > f: > > ret > > > > Bootstrap+test passes on aarch64-linux-gnu, and SVE bootstrap in progress. > > Does it look OK for stage-1 ? > > I didn't look at the patch but tree-ssa-forwprop.cc:simplify_permutation > should > handle two consecutive permutes with the is_combined_permutation_identity > which might need tweaking for VLA vectors Hi Richard, Thanks for the suggestions. The attached patch modifies is_combined_permutation_identity to recognize the above pattern. Does it look OK ? Bootstrap+test in progress on aarch64-linux-gnu and x86_64-linux-gnu. Thanks, Prathamesh > > Richard. > > > > > Thanks, > > Prathamesh gcc/ChangeLog: * tree-ssa-forwprop.cc (is_combined_permutation_identity): New parameter def_stmt. Try to simplify two successive VEC_PERM_EXPRs with single operand and same mask, where mask chooses elements in reverse order. gcc/testesuite/ChangeLog: * gcc.target/aarch64/sve/acle/general/rev-1.c: New test. diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/rev-1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/rev-1.c new file mode 100644 index 000..e57ee67d716 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/rev-1.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -fdump-tree-optimized" } */ + +#include + +svint32_t f(svint32_t v) +{ + return svrev_s32 (svrev_s32 (v)); +} + +/* { dg-final { scan-tree-dump "return v_1\\(D\\)" "optimized" } } */ +/* { dg-final { scan-tree-dump-not "VEC_PERM_EXPR" "optimized" } } */ diff --git a/gcc/tree-ssa-forwprop.cc b/gcc/tree-ssa-forwprop.cc index 9b567440ba4..5cbee077d89 100644 --- a/gcc/tree-ssa-forwprop.cc +++ b/gcc/tree-ssa-forwprop.cc @@ -2532,7 +2532,7 @@ simplify_bitfield_ref (gimple_stmt_iterator *gsi) gives back one of the input. */ static int -is_combined_permutation_identity (tree mask1, tree mask2) +is_combined_permutation_identity (tree mask1, tree mask2, gimple *def_stmt = NULL) { tree mask; unsigned HOST_WIDE_INT nelts, i, j; @@ -2541,7 +2541,36 @@ is_combined_permutation_identity (tree mask1, tree mask2) gcc_checking_assert (TREE_CODE (mask1) == VECTOR_CST && TREE_CODE (mask2) == VECTOR_CST); + if (def_stmt) +gcc_checking_assert (is_gimple_assign (def_stmt) +&& gimple_assign_rhs_code (def_stmt) == VEC_PERM_EXPR); + mask = fold_ternary (VEC_PERM_EXPR, TREE_TYPE (mask1), mask1, mask1, mask2); + + /* For VLA masks, check for the following pattern: + v1 = VEC_PERM_EXPR (v0, v0, mask) + v2 = VEC_PERM_EXPR (v1, v1, mask) + --> + v2 = v0 + if mask is {nelts - 1, nelts - 2, ...}. */ + + if (operand_equal_p (mask1, mask2, 0) + && !VECTOR_CST_NELTS (mask1).is_constant () + && def_stmt + && operand_equal_p (gimple_assign_rhs1 (def_stmt), + gimple_assign_rhs2 (def_stmt), 0)) +{ + vec_perm_builder builder; + if (tree_to_vec_perm_builder (&builder, mask1)) + { + poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask1)); + vec_perm_indices sel (builder, 1, nelts); + if (sel.series_p (0, 1, nelts - 1, -1)) + return 1; + } + return 0; +} + if (mask == NULL_TREE || TREE_CODE (mask) != VECTOR_CST) return 0; @@ -2629,7 +2658,7 @@ simplify_permutation (gimple_stmt_iterator *gsi) op3 = gimple_assign_rhs3 (def_stmt); if (TREE_CODE (op3) != VECTOR_CST) return 0; - ident = is_combined_permutation_identity (op3, op2); + ident = is_combined_permutation_identity (op3, op2, def_stmt); if (!ident) return 0; orig = (ident == 1) ? gimple_assign_rhs1 (def_stmt)
Re: [match.pd] [SVE] Add pattern to transform svrev(svrev(v)) --> v
On Wed, Apr 5, 2023 at 10:39 AM Prathamesh Kulkarni via Gcc-patches wrote: > > Hi, > For the following test: > > svint32_t f(svint32_t v) > { > return svrev_s32 (svrev_s32 (v)); > } > > We generate 2 rev instructions instead of nop: > f: > rev z0.s, z0.s > rev z0.s, z0.s > ret > > The attached patch tries to fix that by trying to recognize the following > pattern in match.pd: > v1 = VEC_PERM_EXPR (v0, v0, mask) > v2 = VEC_PERM_EXPR (v1, v1, mask) > --> > v2 = v0 > if mask is { nelts - 1, nelts - 2, nelts - 3, ... } > > Code-gen with patch: > f: > ret > > Bootstrap+test passes on aarch64-linux-gnu, and SVE bootstrap in progress. > Does it look OK for stage-1 ? I didn't look at the patch but tree-ssa-forwprop.cc:simplify_permutation should handle two consecutive permutes with the is_combined_permutation_identity which might need tweaking for VLA vectors Richard. > > Thanks, > Prathamesh
[match.pd] [SVE] Add pattern to transform svrev(svrev(v)) --> v
Hi, For the following test: svint32_t f(svint32_t v) { return svrev_s32 (svrev_s32 (v)); } We generate 2 rev instructions instead of nop: f: rev z0.s, z0.s rev z0.s, z0.s ret The attached patch tries to fix that by trying to recognize the following pattern in match.pd: v1 = VEC_PERM_EXPR (v0, v0, mask) v2 = VEC_PERM_EXPR (v1, v1, mask) --> v2 = v0 if mask is { nelts - 1, nelts - 2, nelts - 3, ... } Code-gen with patch: f: ret Bootstrap+test passes on aarch64-linux-gnu, and SVE bootstrap in progress. Does it look OK for stage-1 ? Thanks, Prathamesh gcc/ChangeLog: * match.pd: New pattern to simplify two successive VEC_PERM_EXPRs with single operand and same mask, where mask chooses elements in reverse order. gcc/testesuite/ChangeLog: * gcc.target/aarch64/sve/acle/general/rev-1.c: New test. diff --git a/gcc/match.pd b/gcc/match.pd index b8d3538b809..19dfc8f3722 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -8456,3 +8456,27 @@ and, } (if (full_perm_p) (vec_perm (op@3 @0 @1) @3 @2)) + +/* Transform: + v1 = VEC_PERM_EXPR (v0, v0, mask) + v2 = VEC_PERM_EXPR (v1, v1, mask) + --> + v2 = v0 + if mask is {nelts - 1, nelts - 2, ...} */ + +(simplify + (vec_perm (vec_perm@2 @0 @0 VECTOR_CST@1) @2 @1) + (with + { +vec_perm_builder builder; +bool rev_p = false; +if (tree_to_vec_perm_builder (&builder, @1)) + { + poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (type); + vec_perm_indices sel (builder, 1, nelts); + if (sel.series_p (0, 1, nelts - 1, -1)) + rev_p = true; + } + } + (if (rev_p) +@0))) diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/rev-1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/rev-1.c new file mode 100644 index 000..e57ee67d716 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/rev-1.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -fdump-tree-optimized" } */ + +#include + +svint32_t f(svint32_t v) +{ + return svrev_s32 (svrev_s32 (v)); +} + +/* { dg-final { scan-tree-dump "return v_1\\(D\\)" "optimized" } } */ +/* { dg-final { scan-tree-dump-not "VEC_PERM_EXPR" "optimized" } } */