https://gcc.gnu.org/g:5ab4db545906270de5dc2a21447392ac3332cf2b
commit r16-4319-g5ab4db545906270de5dc2a21447392ac3332cf2b Author: Richard Biener <[email protected]> Date: Thu Oct 9 09:06:27 2025 +0200 tree-optimization/122212 - fix CLZ detection The following corrects a mistake with the zero value handling which was broken because the bits bias was applied first which works for the special-case using a bit-and but not when using a conditional move. Apply this after the fact instead where it also more easily folds with an existing bias we compensate. PR tree-optimization/122212 * tree-ssa-forwprop.cc (simplify_count_zeroes): Apply bias for CLZ after dealing with the zero special value. * gcc.dg/torture/pr122212.c: New testcase. Diff: --- gcc/testsuite/gcc.dg/torture/pr122212.c | 28 ++++++++++++++++++++++++++++ gcc/tree-ssa-forwprop.cc | 23 ++++++++++++----------- 2 files changed, 40 insertions(+), 11 deletions(-) diff --git a/gcc/testsuite/gcc.dg/torture/pr122212.c b/gcc/testsuite/gcc.dg/torture/pr122212.c new file mode 100644 index 000000000000..01a66313bbb8 --- /dev/null +++ b/gcc/testsuite/gcc.dg/torture/pr122212.c @@ -0,0 +1,28 @@ +/* { dg-do run } */ + +typedef __UINT32_TYPE__ uint32_t; + +uint32_t __attribute__((noipa)) +ZSTD_countLeadingZeros32_fallback(uint32_t val) +{ + static const uint32_t DeBruijnClz[32] + = { 0, 9, 1, 10, 13, 21, 2, 29, + 11, 14, 16, 18, 22, 25, 3, 30, + 8, 12, 20, 28, 15, 17, 24, 7, + 19, 27, 23, 6, 26, 5, 4, 31}; + val |= val >> 1; + val |= val >> 2; + val |= val >> 4; + val |= val >> 8; + val |= val >> 16; + return 31 - DeBruijnClz[(val * 0x07C4ACDDU) >> 27]; +} + +int main() +{ + if (ZSTD_countLeadingZeros32_fallback (0) != 31) + __builtin_abort (); + if (ZSTD_countLeadingZeros32_fallback (-1U) != 0) + __builtin_abort (); + return 0; +} diff --git a/gcc/tree-ssa-forwprop.cc b/gcc/tree-ssa-forwprop.cc index ee3bb401f31a..749708f05a29 100644 --- a/gcc/tree-ssa-forwprop.cc +++ b/gcc/tree-ssa-forwprop.cc @@ -3295,17 +3295,6 @@ simplify_count_zeroes (gimple_stmt_iterator *gsi) gimple_seq_add_stmt (&seq, call); tree prev_lhs = gimple_call_lhs (call); - if (fn == IFN_CLZ) - { - g = gimple_build_assign (make_ssa_name (integer_type_node), - MINUS_EXPR, - build_int_cst (integer_type_node, - input_bits - 1), - prev_lhs); - gimple_set_location (g, gimple_location (stmt)); - gimple_seq_add_stmt (&seq, g); - prev_lhs = gimple_assign_lhs (g); - } if (zero_ok && zero_val == ctz_val) ; @@ -3337,6 +3326,18 @@ simplify_count_zeroes (gimple_stmt_iterator *gsi) prev_lhs = gimple_assign_lhs (g); } + if (fn == IFN_CLZ) + { + g = gimple_build_assign (make_ssa_name (integer_type_node), + MINUS_EXPR, + build_int_cst (integer_type_node, + input_bits - 1), + prev_lhs); + gimple_set_location (g, gimple_location (stmt)); + gimple_seq_add_stmt (&seq, g); + prev_lhs = gimple_assign_lhs (g); + } + g = gimple_build_assign (gimple_assign_lhs (stmt), NOP_EXPR, prev_lhs); gimple_seq_add_stmt (&seq, g); gsi_replace_with_seq (gsi, seq, true);
