The following corrects a mistake with the zero value handling which
was broken because the bits bias was applied first which works
for the special-case using a bit-and but not when using a conditional
move.  Apply this after the fact instead where it also more easily
folds with an existing bias we compensate.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

        PR tree-optimization/122212
        * tree-ssa-forwprop.cc (simplify_count_zeroes): Apply
        bias for CLZ after dealing with the zero special value.

        * gcc.dg/torture/pr122212.c: New testcase.
---
 gcc/testsuite/gcc.dg/torture/pr122212.c | 28 +++++++++++++++++++++++++
 gcc/tree-ssa-forwprop.cc                | 23 ++++++++++----------
 2 files changed, 40 insertions(+), 11 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/torture/pr122212.c

diff --git a/gcc/testsuite/gcc.dg/torture/pr122212.c 
b/gcc/testsuite/gcc.dg/torture/pr122212.c
new file mode 100644
index 00000000000..01a66313bbb
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr122212.c
@@ -0,0 +1,28 @@
+/* { dg-do run } */
+
+typedef __UINT32_TYPE__ uint32_t;
+
+uint32_t __attribute__((noipa))
+ZSTD_countLeadingZeros32_fallback(uint32_t val)
+{
+  static const uint32_t DeBruijnClz[32]
+    = { 0, 9, 1, 10, 13, 21, 2, 29,
+        11, 14, 16, 18, 22, 25, 3, 30,
+        8, 12, 20, 28, 15, 17, 24, 7,
+        19, 27, 23, 6, 26, 5, 4, 31};
+  val |= val >> 1;
+  val |= val >> 2;
+  val |= val >> 4;
+  val |= val >> 8;
+  val |= val >> 16;
+  return 31 - DeBruijnClz[(val * 0x07C4ACDDU) >> 27];
+}
+
+int main()
+{
+  if (ZSTD_countLeadingZeros32_fallback (0) != 31)
+    __builtin_abort ();
+  if (ZSTD_countLeadingZeros32_fallback (-1U) != 0)
+    __builtin_abort ();
+  return 0;
+}
diff --git a/gcc/tree-ssa-forwprop.cc b/gcc/tree-ssa-forwprop.cc
index ee3bb401f31..749708f05a2 100644
--- a/gcc/tree-ssa-forwprop.cc
+++ b/gcc/tree-ssa-forwprop.cc
@@ -3295,17 +3295,6 @@ simplify_count_zeroes (gimple_stmt_iterator *gsi)
   gimple_seq_add_stmt (&seq, call);
 
   tree prev_lhs = gimple_call_lhs (call);
-  if (fn == IFN_CLZ)
-    {
-      g = gimple_build_assign (make_ssa_name (integer_type_node),
-                              MINUS_EXPR,
-                              build_int_cst (integer_type_node,
-                                             input_bits - 1),
-                              prev_lhs);
-      gimple_set_location (g, gimple_location (stmt));
-      gimple_seq_add_stmt (&seq, g);
-      prev_lhs = gimple_assign_lhs (g);
-    }
 
   if (zero_ok && zero_val == ctz_val)
     ;
@@ -3337,6 +3326,18 @@ simplify_count_zeroes (gimple_stmt_iterator *gsi)
       prev_lhs = gimple_assign_lhs (g);
     }
 
+  if (fn == IFN_CLZ)
+    {
+      g = gimple_build_assign (make_ssa_name (integer_type_node),
+                              MINUS_EXPR,
+                              build_int_cst (integer_type_node,
+                                             input_bits - 1),
+                              prev_lhs);
+      gimple_set_location (g, gimple_location (stmt));
+      gimple_seq_add_stmt (&seq, g);
+      prev_lhs = gimple_assign_lhs (g);
+    }
+
   g = gimple_build_assign (gimple_assign_lhs (stmt), NOP_EXPR, prev_lhs);
   gimple_seq_add_stmt (&seq, g);
   gsi_replace_with_seq (gsi, seq, true);
-- 
2.51.0

Reply via email to