The following corrects a mistake with the zero value handling which
was broken because the bits bias was applied first which works
for the special-case using a bit-and but not when using a conditional
move. Apply this after the fact instead where it also more easily
folds with an existing bias we compensate.
Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.
PR tree-optimization/122212
* tree-ssa-forwprop.cc (simplify_count_zeroes): Apply
bias for CLZ after dealing with the zero special value.
* gcc.dg/torture/pr122212.c: New testcase.
---
gcc/testsuite/gcc.dg/torture/pr122212.c | 28 +++++++++++++++++++++++++
gcc/tree-ssa-forwprop.cc | 23 ++++++++++----------
2 files changed, 40 insertions(+), 11 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/torture/pr122212.c
diff --git a/gcc/testsuite/gcc.dg/torture/pr122212.c
b/gcc/testsuite/gcc.dg/torture/pr122212.c
new file mode 100644
index 00000000000..01a66313bbb
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr122212.c
@@ -0,0 +1,28 @@
+/* { dg-do run } */
+
+typedef __UINT32_TYPE__ uint32_t;
+
+uint32_t __attribute__((noipa))
+ZSTD_countLeadingZeros32_fallback(uint32_t val)
+{
+ static const uint32_t DeBruijnClz[32]
+ = { 0, 9, 1, 10, 13, 21, 2, 29,
+ 11, 14, 16, 18, 22, 25, 3, 30,
+ 8, 12, 20, 28, 15, 17, 24, 7,
+ 19, 27, 23, 6, 26, 5, 4, 31};
+ val |= val >> 1;
+ val |= val >> 2;
+ val |= val >> 4;
+ val |= val >> 8;
+ val |= val >> 16;
+ return 31 - DeBruijnClz[(val * 0x07C4ACDDU) >> 27];
+}
+
+int main()
+{
+ if (ZSTD_countLeadingZeros32_fallback (0) != 31)
+ __builtin_abort ();
+ if (ZSTD_countLeadingZeros32_fallback (-1U) != 0)
+ __builtin_abort ();
+ return 0;
+}
diff --git a/gcc/tree-ssa-forwprop.cc b/gcc/tree-ssa-forwprop.cc
index ee3bb401f31..749708f05a2 100644
--- a/gcc/tree-ssa-forwprop.cc
+++ b/gcc/tree-ssa-forwprop.cc
@@ -3295,17 +3295,6 @@ simplify_count_zeroes (gimple_stmt_iterator *gsi)
gimple_seq_add_stmt (&seq, call);
tree prev_lhs = gimple_call_lhs (call);
- if (fn == IFN_CLZ)
- {
- g = gimple_build_assign (make_ssa_name (integer_type_node),
- MINUS_EXPR,
- build_int_cst (integer_type_node,
- input_bits - 1),
- prev_lhs);
- gimple_set_location (g, gimple_location (stmt));
- gimple_seq_add_stmt (&seq, g);
- prev_lhs = gimple_assign_lhs (g);
- }
if (zero_ok && zero_val == ctz_val)
;
@@ -3337,6 +3326,18 @@ simplify_count_zeroes (gimple_stmt_iterator *gsi)
prev_lhs = gimple_assign_lhs (g);
}
+ if (fn == IFN_CLZ)
+ {
+ g = gimple_build_assign (make_ssa_name (integer_type_node),
+ MINUS_EXPR,
+ build_int_cst (integer_type_node,
+ input_bits - 1),
+ prev_lhs);
+ gimple_set_location (g, gimple_location (stmt));
+ gimple_seq_add_stmt (&seq, g);
+ prev_lhs = gimple_assign_lhs (g);
+ }
+
g = gimple_build_assign (gimple_assign_lhs (stmt), NOP_EXPR, prev_lhs);
gimple_seq_add_stmt (&seq, g);
gsi_replace_with_seq (gsi, seq, true);
--
2.51.0