https://gcc.gnu.org/g:5ab4db545906270de5dc2a21447392ac3332cf2b

commit r16-4319-g5ab4db545906270de5dc2a21447392ac3332cf2b
Author: Richard Biener <[email protected]>
Date:   Thu Oct 9 09:06:27 2025 +0200

    tree-optimization/122212 - fix CLZ detection
    
    The following corrects a mistake with the zero value handling which
    was broken because the bits bias was applied first which works
    for the special-case using a bit-and but not when using a conditional
    move.  Apply this after the fact instead where it also more easily
    folds with an existing bias we compensate.
    
            PR tree-optimization/122212
            * tree-ssa-forwprop.cc (simplify_count_zeroes): Apply
            bias for CLZ after dealing with the zero special value.
    
            * gcc.dg/torture/pr122212.c: New testcase.

Diff:
---
 gcc/testsuite/gcc.dg/torture/pr122212.c | 28 ++++++++++++++++++++++++++++
 gcc/tree-ssa-forwprop.cc                | 23 ++++++++++++-----------
 2 files changed, 40 insertions(+), 11 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/torture/pr122212.c 
b/gcc/testsuite/gcc.dg/torture/pr122212.c
new file mode 100644
index 000000000000..01a66313bbb8
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr122212.c
@@ -0,0 +1,28 @@
+/* { dg-do run } */
+
+typedef __UINT32_TYPE__ uint32_t;
+
+uint32_t __attribute__((noipa))
+ZSTD_countLeadingZeros32_fallback(uint32_t val)
+{
+  static const uint32_t DeBruijnClz[32]
+    = { 0, 9, 1, 10, 13, 21, 2, 29,
+        11, 14, 16, 18, 22, 25, 3, 30,
+        8, 12, 20, 28, 15, 17, 24, 7,
+        19, 27, 23, 6, 26, 5, 4, 31};
+  val |= val >> 1;
+  val |= val >> 2;
+  val |= val >> 4;
+  val |= val >> 8;
+  val |= val >> 16;
+  return 31 - DeBruijnClz[(val * 0x07C4ACDDU) >> 27];
+}
+
+int main()
+{
+  if (ZSTD_countLeadingZeros32_fallback (0) != 31)
+    __builtin_abort ();
+  if (ZSTD_countLeadingZeros32_fallback (-1U) != 0)
+    __builtin_abort ();
+  return 0;
+}
diff --git a/gcc/tree-ssa-forwprop.cc b/gcc/tree-ssa-forwprop.cc
index ee3bb401f31a..749708f05a29 100644
--- a/gcc/tree-ssa-forwprop.cc
+++ b/gcc/tree-ssa-forwprop.cc
@@ -3295,17 +3295,6 @@ simplify_count_zeroes (gimple_stmt_iterator *gsi)
   gimple_seq_add_stmt (&seq, call);
 
   tree prev_lhs = gimple_call_lhs (call);
-  if (fn == IFN_CLZ)
-    {
-      g = gimple_build_assign (make_ssa_name (integer_type_node),
-                              MINUS_EXPR,
-                              build_int_cst (integer_type_node,
-                                             input_bits - 1),
-                              prev_lhs);
-      gimple_set_location (g, gimple_location (stmt));
-      gimple_seq_add_stmt (&seq, g);
-      prev_lhs = gimple_assign_lhs (g);
-    }
 
   if (zero_ok && zero_val == ctz_val)
     ;
@@ -3337,6 +3326,18 @@ simplify_count_zeroes (gimple_stmt_iterator *gsi)
       prev_lhs = gimple_assign_lhs (g);
     }
 
+  if (fn == IFN_CLZ)
+    {
+      g = gimple_build_assign (make_ssa_name (integer_type_node),
+                              MINUS_EXPR,
+                              build_int_cst (integer_type_node,
+                                             input_bits - 1),
+                              prev_lhs);
+      gimple_set_location (g, gimple_location (stmt));
+      gimple_seq_add_stmt (&seq, g);
+      prev_lhs = gimple_assign_lhs (g);
+    }
+
   g = gimple_build_assign (gimple_assign_lhs (stmt), NOP_EXPR, prev_lhs);
   gimple_seq_add_stmt (&seq, g);
   gsi_replace_with_seq (gsi, seq, true);

Reply via email to