https://gcc.gnu.org/g:659f4d0e1e9a740fa8a18636bd7ec91cfefee26c

commit r16-6740-g659f4d0e1e9a740fa8a18636bd7ec91cfefee26c
Author: Robin Dapp <[email protected]>
Date:   Fri Jan 9 13:25:40 2026 +0100

    rtlanal: Determine nonzero bits of popcount from operand [PR123501].
    
    The PR involves large mask vectors (e.g. V128BI) from which we take
    the popcount.  Currently a (popcount:DI (V128BI)) is assumed to have
    at most 8 set bits as we assume the popcount operand also has DImode.
    
    This patch uses the operand mode for unary operations and thus
    calculates a proper nonzero-bits mask.
    
    We could do the same estimate for ctz and clz but they use nonzero in a
    non-poly way and I didn't want to change more than necessary.  Therefore
    the patch just returns -1 when we have a different operand mode for
    ctz/clz.
    
            PR rtl-optimization/123501
            PR rtl-optimization/123444
    
    gcc/ChangeLog:
    
            * rtlanal.cc (nonzero_bits1): Use operand mode instead of
            operation mode.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/riscv/rvv/autovec/reduc/pr123501.c: New test.

Diff:
---
 gcc/rtlanal.cc                                     | 35 ++++++++++++++++++----
 .../gcc.target/riscv/rvv/autovec/reduc/pr123501.c  | 21 +++++++++++++
 2 files changed, 50 insertions(+), 6 deletions(-)

diff --git a/gcc/rtlanal.cc b/gcc/rtlanal.cc
index cac82e491113..27349a0a74f5 100644
--- a/gcc/rtlanal.cc
+++ b/gcc/rtlanal.cc
@@ -4795,6 +4795,24 @@ nonzero_bits1 (const_rtx x, scalar_int_mode mode, 
const_rtx known_x,
 
   unsigned int mode_width = GET_MODE_PRECISION (mode);
 
+  /* For unary ops like ffs or popcount we want to determine the number of
+     nonzero bits from the operand.  This only matters with very large
+     vector modes.  A
+       (popcount:DI (V128BImode)
+     should not get a nonzero-bit mask of (1 << 7) - 1 as that could
+     lead to incorrect optimizations based on it, see PR123501.  */
+  unsigned int op_mode_width = mode_width;
+  machine_mode op_mode = mode;
+  if (UNARY_P (x))
+    {
+      const_rtx op = XEXP (x, 0);
+      if (GET_MODE_PRECISION (GET_MODE (op)).is_constant ())
+       {
+         op_mode = GET_MODE (op);
+         op_mode_width = GET_MODE_PRECISION (op_mode).to_constant ();
+       }
+    }
+
   if (CONST_INT_P (x))
     {
       if (SHORT_IMMEDIATES_SIGN_EXTEND
@@ -5198,13 +5216,16 @@ nonzero_bits1 (const_rtx x, scalar_int_mode mode, 
const_rtx known_x,
     case FFS:
     case POPCOUNT:
       /* This is at most the number of bits in the mode.  */
-      nonzero = (HOST_WIDE_INT_UC (2) << (floor_log2 (mode_width))) - 1;
+      nonzero = (HOST_WIDE_INT_UC (2) << (floor_log2 (op_mode_width))) - 1;
       break;
 
     case CLZ:
       /* If CLZ has a known value at zero, then the nonzero bits are
-        that value, plus the number of bits in the mode minus one.  */
-      if (CLZ_DEFINED_VALUE_AT_ZERO (mode, nonzero))
+        that value, plus the number of bits in the mode minus one.
+        If we have a different operand mode, don't try to get nonzero
+        bits as currently nonzero is not a poly_int.  */
+      if (op_mode == mode
+         && CLZ_DEFINED_VALUE_AT_ZERO (mode, nonzero))
        nonzero
          |= (HOST_WIDE_INT_1U << (floor_log2 (mode_width))) - 1;
       else
@@ -5213,8 +5234,10 @@ nonzero_bits1 (const_rtx x, scalar_int_mode mode, 
const_rtx known_x,
 
     case CTZ:
       /* If CTZ has a known value at zero, then the nonzero bits are
-        that value, plus the number of bits in the mode minus one.  */
-      if (CTZ_DEFINED_VALUE_AT_ZERO (mode, nonzero))
+        that value, plus the number of bits in the mode minus one.
+        See above for op_mode != mode.  */
+      if (op_mode == mode
+         && CLZ_DEFINED_VALUE_AT_ZERO (mode, nonzero))
        nonzero
          |= (HOST_WIDE_INT_1U << (floor_log2 (mode_width))) - 1;
       else
@@ -5223,7 +5246,7 @@ nonzero_bits1 (const_rtx x, scalar_int_mode mode, 
const_rtx known_x,
 
     case CLRSB:
       /* This is at most the number of bits in the mode minus 1.  */
-      nonzero = (HOST_WIDE_INT_1U << (floor_log2 (mode_width))) - 1;
+      nonzero = (HOST_WIDE_INT_1U << (floor_log2 (op_mode_width))) - 1;
       break;
 
     case PARITY:
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/pr123501.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/pr123501.c
new file mode 100644
index 000000000000..a0a454095ac1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/pr123501.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=rv64gcv_zvl256b -mabi=lp64d -mrvv-vector-bits=zvl 
-mrvv-max-lmul=m4" } */
+
+long long b;
+_Bool a=1;
+char e[13];
+
+int main() {
+  for (long h=0; h<13; ++h)
+    e[h] = 110;
+  for (int i=5; i<9; i++)
+    for (int k=0; k<1031; k++) {
+        int l = e[0] ? e[i] : 0;
+        a = l ? a:l;
+    }
+  b = (int)a;
+  if (b != 1)
+    __builtin_abort ();
+}
+
+/* { dg-final { scan-assembler-times "vcpop.m" 2 } } */

Reply via email to