> Am 18.12.2024 um 05:00 schrieb Alexandre Oliva <ol...@adacore.com>:
>
> When a loaded field is sign extended, masked and compared, we used to
> drop from the mask the bits past the original field width, which is
> not correct.
>
> Take note of the fact that the mask covered copies of the sign bit,
> before clipping it, and arrange to test the sign bit if we're
> comparing with zero. Punt in other cases.
>
> If bits_test fail recoverably, try other ifcombine strategies.
>
> Regstrapped on x86_64-linux-gnu and ppc64-linux-gnu, along with 5 other
> ifcombine patches. Ok to install?
Ok
Richard
> for gcc/ChangeLog
>
> * gimple-fold.cc (decode_field_reference): Add psignbit
> parameter. Set it if the mask references sign-extending
> bits.
> (fold_truth_andor_for_ifcombine): Adjust calls with new
> variables. Swap them along with other r?_* variables. Handle
> extended sign bit compares with zero.
> * tree-ssa-ifcombine.cc (ifcombine_ifandif): If bits_test
> fails in a way that doesn't prevent other ifcombine strategies
> from passing, give them a try.
>
> for gcc/testsuite/ChangeLog
>
> * gcc.dg/field-merge-16.c: New.
> ---
> gcc/gimple-fold.cc | 69 ++++++++++++++++++++++++++++-----
> gcc/testsuite/gcc.dg/field-merge-16.c | 66 ++++++++++++++++++++++++++++++++
> gcc/tree-ssa-ifcombine.cc | 5 +-
> 3 files changed, 128 insertions(+), 12 deletions(-)
> create mode 100644 gcc/testsuite/gcc.dg/field-merge-16.c
>
> diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc
> index 856ee5369a8c9..b84269d6b4179 100644
> --- a/gcc/gimple-fold.cc
> +++ b/gcc/gimple-fold.cc
> @@ -7514,6 +7514,9 @@ gimple_binop_def_p (enum tree_code code, tree t, tree
> op[2])
> is initially set to a mask with nonzero precision, that mask is
> combined with the found mask, or adjusted in precision to match.
>
> + *PSIGNBIT is set to TRUE if, before clipping to *PBITSIZE, the mask
> + encompassed bits that corresponded to extensions of the sign bit.
> +
> *XOR_P is to be FALSE if EXP might be a XOR used in a compare, in which
> case, if XOR_CMP_OP is a zero constant, it will be overridden with *PEXP,
> *XOR_P will be set to TRUE, and the left-hand operand of the XOR will be
> @@ -7533,7 +7536,8 @@ static tree
> decode_field_reference (tree *pexp, HOST_WIDE_INT *pbitsize,
> HOST_WIDE_INT *pbitpos,
> bool *punsignedp, bool *preversep, bool *pvolatilep,
> - wide_int *pand_mask, bool *xor_p, tree *xor_cmp_op,
> + wide_int *pand_mask, bool *psignbit,
> + bool *xor_p, tree *xor_cmp_op,
> gimple **load, location_t loc[4])
> {
> tree exp = *pexp;
> @@ -7545,6 +7549,7 @@ decode_field_reference (tree *pexp, HOST_WIDE_INT
> *pbitsize,
> machine_mode mode;
>
> *load = NULL;
> + *psignbit = false;
>
> /* All the optimizations using this function assume integer fields.
> There are problems with FP fields since the type_for_size call
> @@ -7708,12 +7713,23 @@ decode_field_reference (tree *pexp, HOST_WIDE_INT
> *pbitsize,
> if (outer_type && *pbitsize == TYPE_PRECISION (outer_type))
> *punsignedp = TYPE_UNSIGNED (outer_type);
>
> - /* Make the mask the expected width. */
> - if (and_mask.get_precision () != 0)
> - and_mask = wide_int::from (and_mask, *pbitsize, UNSIGNED);
> -
> if (pand_mask)
> - *pand_mask = and_mask;
> + {
> + /* Make the mask the expected width. */
> + if (and_mask.get_precision () != 0)
> + {
> + /* If the AND_MASK encompasses bits that would be extensions of
> + the sign bit, set *PSIGNBIT. */
> + if (!unsignedp
> + && and_mask.get_precision () > *pbitsize
> + && (and_mask
> + & wi::mask (*pbitsize, true, and_mask.get_precision ())) != 0)
> + *psignbit = true;
> + and_mask = wide_int::from (and_mask, *pbitsize, UNSIGNED);
> + }
> +
> + *pand_mask = and_mask;
> + }
>
> return inner;
> }
> @@ -7995,6 +8011,7 @@ fold_truth_andor_for_ifcombine (enum tree_code code,
> tree truth_type,
> HOST_WIDE_INT rnbitsize, rnbitpos, rnprec;
> bool ll_unsignedp, lr_unsignedp, rl_unsignedp, rr_unsignedp;
> bool ll_reversep, lr_reversep, rl_reversep, rr_reversep;
> + bool ll_signbit, lr_signbit, rl_signbit, rr_signbit;
> scalar_int_mode lnmode, lnmode2, rnmode;
> wide_int ll_and_mask, lr_and_mask, rl_and_mask, rr_and_mask;
> wide_int l_const, r_const;
> @@ -8114,19 +8131,19 @@ fold_truth_andor_for_ifcombine (enum tree_code code,
> tree truth_type,
> bool l_xor = false, r_xor = false;
> ll_inner = decode_field_reference (&ll_arg, &ll_bitsize, &ll_bitpos,
> &ll_unsignedp, &ll_reversep, &volatilep,
> - &ll_and_mask, &l_xor, &lr_arg,
> + &ll_and_mask, &ll_signbit, &l_xor, &lr_arg,
> &ll_load, ll_loc);
> lr_inner = decode_field_reference (&lr_arg, &lr_bitsize, &lr_bitpos,
> &lr_unsignedp, &lr_reversep, &volatilep,
> - &lr_and_mask, &l_xor, 0,
> + &lr_and_mask, &lr_signbit, &l_xor, 0,
> &lr_load, lr_loc);
> rl_inner = decode_field_reference (&rl_arg, &rl_bitsize, &rl_bitpos,
> &rl_unsignedp, &rl_reversep, &volatilep,
> - &rl_and_mask, &r_xor, &rr_arg,
> + &rl_and_mask, &rl_signbit, &r_xor, &rr_arg,
> &rl_load, rl_loc);
> rr_inner = decode_field_reference (&rr_arg, &rr_bitsize, &rr_bitpos,
> &rr_unsignedp, &rr_reversep, &volatilep,
> - &rr_and_mask, &r_xor, 0,
> + &rr_and_mask, &rr_signbit, &r_xor, 0,
> &rr_load, rr_loc);
>
> /* It must be true that the inner operation on the lhs of each
> @@ -8156,6 +8173,7 @@ fold_truth_andor_for_ifcombine (enum tree_code code,
> tree truth_type,
> std::swap (rl_unsignedp, rr_unsignedp);
> std::swap (rl_reversep, rr_reversep);
> std::swap (rl_and_mask, rr_and_mask);
> + std::swap (rl_signbit, rr_signbit);
> std::swap (rl_load, rr_load);
> std::swap (rl_loc, rr_loc);
> }
> @@ -8165,6 +8183,37 @@ fold_truth_andor_for_ifcombine (enum tree_code code,
> tree truth_type,
> : (!ll_load != !rl_load))
> return 0;
>
> + /* ??? Can we do anything with these? */
> + if (lr_signbit || rr_signbit)
> + return 0;
> +
> + /* If the mask encompassed extensions of the sign bit before
> + clipping, try to include the sign bit in the test. If we're not
> + comparing with zero, don't even try to deal with it (for now?).
> + If we've already commited to a sign test, the extended (before
> + clipping) mask could already be messing with it. */
> + if (ll_signbit)
> + {
> + if (!integer_zerop (lr_arg) || lsignbit)
> + return 0;
> + wide_int sign = wi::mask (ll_bitsize - 1, true, ll_bitsize);
> + if (!ll_and_mask.get_precision ())
> + ll_and_mask = sign;
> + else
> + ll_and_mask |= sign;
> + }
> +
> + if (rl_signbit)
> + {
> + if (!integer_zerop (rr_arg) || rsignbit)
> + return 0;
> + wide_int sign = wi::mask (rl_bitsize - 1, true, rl_bitsize);
> + if (!rl_and_mask.get_precision ())
> + rl_and_mask = sign;
> + else
> + rl_and_mask |= sign;
> + }
> +
> if (TREE_CODE (lr_arg) == INTEGER_CST
> && TREE_CODE (rr_arg) == INTEGER_CST)
> {
> diff --git a/gcc/testsuite/gcc.dg/field-merge-16.c
> b/gcc/testsuite/gcc.dg/field-merge-16.c
> new file mode 100644
> index 0000000000000..2ca23ea663a45
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/field-merge-16.c
> @@ -0,0 +1,66 @@
> +/* { dg-do run } */
> +/* { dg-options "-O -fdump-tree-ifcombine-details" } */
> +
> +/* Check that tests for sign-extension bits are handled correctly. */
> +
> +struct s {
> + short a;
> + short b;
> + unsigned short c;
> + unsigned short d;
> +} __attribute__ ((aligned (8)));
> +
> +struct s p = { -1, 0, 0, 0 };
> +struct s q = { 0, -1, 0, 0 };
> +struct s r = { 1, 1, 0, 0 };
> +
> +const long long mask = 1ll << (sizeof (long long) * __CHAR_BIT__ - 5);
> +
> +int fp ()
> +{
> + if ((p.a & mask)
> + || (p.c & mask)
> + || p.d
> + || (p.b & mask))
> + return 1;
> + else
> + return -1;
> +}
> +
> +int fq ()
> +{
> + if ((q.a & mask)
> + || (q.c & mask)
> + || q.d
> + || (q.b & mask))
> + return 1;
> + else
> + return -1;
> +}
> +
> +int fr ()
> +{
> + if ((r.a & mask)
> + || (r.c & mask)
> + || r.d
> + || (r.b & mask))
> + return 1;
> + else
> + return -1;
> +}
> +
> +int main () {
> + /* Unlikely, but play safe. */
> + if (sizeof (long long) == sizeof (short))
> + return 0;
> + if (fp () < 0
> + || fq () < 0
> + || fr () > 0)
> + __builtin_abort ();
> + return 0;
> +}
> +
> +/* We test .b after other fields instead of right after .a to give field
> + merging a chance, otherwise the masked compares with zero are combined by
> + other ifcombine logic. The .c test is discarded by earlier optimizers.
> */
> +/* { dg-final { scan-tree-dump-times "optimizing" 6 "ifcombine" } } */
> diff --git a/gcc/tree-ssa-ifcombine.cc b/gcc/tree-ssa-ifcombine.cc
> index 02c2f5a29b561..c9399a1069452 100644
> --- a/gcc/tree-ssa-ifcombine.cc
> +++ b/gcc/tree-ssa-ifcombine.cc
> @@ -899,7 +899,7 @@ ifcombine_ifandif (basic_block inner_cond_bb, bool
> inner_inv,
> else if (bits1 == name2)
> std::swap (name1, bits1);
> else
> - return false;
> + goto bits_test_failed;
>
> /* As we strip non-widening conversions in finding a common
> name that is tested make sure to end up with an integral
> @@ -944,7 +944,8 @@ ifcombine_ifandif (basic_block inner_cond_bb, bool
> inner_inv,
> }
>
> /* See if we have two comparisons that we can merge into one. */
> - else if (TREE_CODE_CLASS (gimple_cond_code (inner_cond)) == tcc_comparison
> + else bits_test_failed:
> + if (TREE_CODE_CLASS (gimple_cond_code (inner_cond)) == tcc_comparison
> && TREE_CODE_CLASS (gimple_cond_code (outer_cond)) == tcc_comparison)
> {
> tree t, ts = NULL_TREE;
>
>
> --
> Alexandre Oliva, happy hacker https://FSFLA.org/blogs/lxo/
> Free Software Activist GNU Toolchain Engineer
> More tolerance and less prejudice are key for inclusion and diversity
> Excluding neuro-others for not behaving ""normal"" is *not* inclusive