From 79355d876503558f661b46ebbeaa11c74ce176cb Mon Sep 17 00:00:00 2001
From: Jennifer Schmitz <jschmitz@nvidia.com>
Date: Thu, 15 Aug 2024 05:42:06 -0700
Subject: [PATCH 1/2] SVE intrinsics: Fold constant operands for svdiv

This patch implements constant folding for svdiv. A new gimple_folder
method was added that uses const_binop to fold binary operations using a
given tree_code. For svdiv, this method is used to fold constant
operands.
Additionally, if at least one of the operands is a zero vector, svdiv is
folded to a zero vector (in case of ptrue, _x, or _z).
Tests were added to check the produced assembly for different
predicates and signed and unsigned integers.
Currently, constant folding is only implemented for integers and binary
operations, but extending it to float types and other operations is
planned for a future follow-up.

The patch was bootstrapped and regtested on aarch64-linux-gnu, no regression.
OK for mainline?

Signed-off-by: Jennifer Schmitz <jschmitz@nvidia.com>

gcc/

	* config/aarch64/aarch64-sve-builtins-base.cc
	(svdiv_impl::fold): Add constant folding.
	* config/aarch64/aarch64-sve-builtins.cc
	(gimple_folder::const_fold): New method.
	* config/aarch64/aarch64-sve-builtins.h
	(gimple_folder::const_fold): Add function declaration.

gcc/testsuite/

	* gcc.target/aarch64/sve/const_fold_div_1.c: New test.
	* gcc.target/aarch64/sve/const_fold_div_zero.c: Likewise.
---
 .../aarch64/aarch64-sve-builtins-base.cc      |  30 ++-
 gcc/config/aarch64/aarch64-sve-builtins.cc    |  25 +++
 gcc/config/aarch64/aarch64-sve-builtins.h     |   1 +
 .../gcc.target/aarch64/sve/const_fold_div_1.c | 128 ++++++++++++
 .../aarch64/sve/const_fold_div_zero.c         | 186 ++++++++++++++++++
 .../aarch64/sve/const_fold_mul_zero.c         |  95 +++++++++
 6 files changed, 462 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/const_fold_div_1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/const_fold_div_zero.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/const_fold_mul_zero.c

diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
index d55bee0b72f..7f948ecc0c7 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
@@ -755,8 +755,32 @@ public:
   gimple *
   fold (gimple_folder &f) const override
   {
-    tree divisor = gimple_call_arg (f.call, 2);
-    tree divisor_cst = uniform_integer_cst_p (divisor);
+    tree pg = gimple_call_arg (f.call, 0);
+    tree op1 = gimple_call_arg (f.call, 1);
+    tree op2 = gimple_call_arg (f.call, 2);
+
+    /* For integer division, if the dividend or divisor are all zeros,
+       fold to zero vector.  */
+    int step = f.type_suffix (0).element_bytes;
+    if (f.pred != PRED_m || is_ptrue (pg, step))
+      {
+	if (vector_cst_all_same (op1, step)
+	    && integer_zerop (VECTOR_CST_ENCODED_ELT (op1, 0)))
+	  return gimple_build_assign (f.lhs, op1);
+	if (vector_cst_all_same (op2, step)
+	    && integer_zerop (VECTOR_CST_ENCODED_ELT (op2, 0)))
+	  return gimple_build_assign (f.lhs, op2);
+      }
+
+    /* Try to fold constant operands.  */
+    tree_code m_code = f.type_suffix (0).integer_p ? TRUNC_DIV_EXPR
+			: RDIV_EXPR;
+    if (gimple *new_stmt = f.const_fold (m_code))
+      return new_stmt;
+
+    /* If the divisor is a uniform power of 2, fold to a shift
+       instruction.  */
+    tree divisor_cst = uniform_integer_cst_p (op2);
 
     if (!divisor_cst || !integer_pow2p (divisor_cst))
       return NULL;
@@ -770,7 +794,7 @@ public:
 				    shapes::binary_uint_opt_n, MODE_n,
 				    f.type_suffix_ids, GROUP_none, f.pred);
 	call = f.redirect_call (instance);
-	tree d = INTEGRAL_TYPE_P (TREE_TYPE (divisor)) ? divisor : divisor_cst;
+	tree d = INTEGRAL_TYPE_P (TREE_TYPE (op2)) ? op2 : divisor_cst;
 	new_divisor = wide_int_to_tree (TREE_TYPE (d), tree_log2 (d));
       }
     else
diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc
index 0a560eaedca..0f69c586464 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins.cc
@@ -3691,6 +3691,31 @@ gimple_folder::fold_to_vl_pred (unsigned int vl)
   return gimple_build_assign (lhs, builder.build ());
 }
 
+/* If the predicate is svptrue or PRED_x, try to perform constant folding
+   on the call using the given tree_code.
+   Return the new statement on success, otherwise return null.  */
+gimple *
+gimple_folder::const_fold (tree_code code)
+{
+  tree pg = gimple_call_arg (call, 0);
+  if (type_suffix (0).integer_p
+      && (is_ptrue (pg, type_suffix (0).element_bytes)
+	  || pred == PRED_x))
+    {
+      if (TREE_CODE_CLASS (code) == tcc_binary)
+	{
+	  gcc_assert (gimple_call_num_args (call) == 3);
+	  tree op1 = gimple_call_arg (call, 1);
+	  tree op2 = gimple_call_arg (call, 2);
+	  if (TREE_TYPE (op1) != TREE_TYPE (op2))
+	    return NULL;
+	  if (tree res = const_binop (code, TREE_TYPE (lhs), op1, op2))
+	    return gimple_build_assign (lhs, res);
+	}
+    }
+  return NULL;
+}
+
 /* Try to fold the call.  Return the new statement on success and null
    on failure.  */
 gimple *
diff --git a/gcc/config/aarch64/aarch64-sve-builtins.h b/gcc/config/aarch64/aarch64-sve-builtins.h
index 9ab6f202c30..db30225a008 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins.h
+++ b/gcc/config/aarch64/aarch64-sve-builtins.h
@@ -636,6 +636,7 @@ public:
   gimple *fold_to_pfalse ();
   gimple *fold_to_ptrue ();
   gimple *fold_to_vl_pred (unsigned int);
+  gimple *const_fold (tree_code);
 
   gimple *fold ();
 
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/const_fold_div_1.c b/gcc/testsuite/gcc.target/aarch64/sve/const_fold_div_1.c
new file mode 100644
index 00000000000..d8460a4d336
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/const_fold_div_1.c
@@ -0,0 +1,128 @@
+/* { dg-final { check-function-bodies "**" "" } } */
+/* { dg-options "-O2" } */
+
+#include "arm_sve.h"
+
+/*
+** s64_x_pg:
+**	mov	z[0-9]+\.d, #1
+**	ret
+*/
+svint64_t s64_x_pg (svbool_t pg)
+{
+  return svdiv_x (pg, svdup_s64 (5), svdup_s64 (3));
+}
+
+/*
+** s64_z_pg:
+**	mov	z[0-9]+\.d, p[0-7]/z, #1
+**	ret
+*/
+svint64_t s64_z_pg (svbool_t pg)
+{
+  return svdiv_z (pg, svdup_s64 (5), svdup_s64 (3));
+}
+
+/*
+** s64_m_pg:
+**	mov	(z[0-9]+\.d), #3
+**	mov	(z[0-9]+\.d), #5
+**	sdiv	\2, p[0-7]/m, \2, \1
+**	ret
+*/
+svint64_t s64_m_pg (svbool_t pg)
+{
+  return svdiv_m (pg, svdup_s64 (5), svdup_s64 (3));
+}
+
+/*
+** s64_x_ptrue:
+**	mov	z[0-9]+\.d, #1
+**	ret
+*/
+svint64_t s64_x_ptrue ()
+{
+  return svdiv_x (svptrue_b64 (), svdup_s64 (5), svdup_s64 (3));
+}
+
+/*
+** s64_z_ptrue:
+**	mov	z[0-9]+\.d, #1
+**	ret
+*/
+svint64_t s64_z_ptrue ()
+{
+  return svdiv_z (svptrue_b64 (), svdup_s64 (5), svdup_s64 (3));
+}
+
+/*
+** s64_m_ptrue:
+**	mov	z[0-9]+\.d, #1
+**	ret
+*/
+svint64_t s64_m_ptrue ()
+{
+  return svdiv_m (svptrue_b64 (), svdup_s64 (5), svdup_s64 (3));
+}
+
+/*
+** u64_x_pg:
+**	mov	z[0-9]+\.d, #1
+**	ret
+*/
+svuint64_t u64_x_pg (svbool_t pg)
+{
+  return svdiv_x (pg, svdup_u64 (5), svdup_u64 (3));
+}
+
+/*
+** u64_z_pg:
+**	mov	z[0-9]+\.d, p[0-7]/z, #1
+**	ret
+*/
+svuint64_t u64_z_pg (svbool_t pg)
+{
+  return svdiv_z (pg, svdup_u64 (5), svdup_u64 (3));
+}
+
+/*
+** u64_m_pg:
+**	mov	(z[0-9]+\.d), #3
+**	mov	(z[0-9]+\.d), #5
+**	udiv	\2, p[0-7]/m, \2, \1
+**	ret
+*/
+svuint64_t u64_m_pg (svbool_t pg)
+{
+  return svdiv_m (pg, svdup_u64 (5), svdup_u64 (3));
+}
+
+/*
+** u64_x_ptrue:
+**	mov	z[0-9]+\.d, #1
+**	ret
+*/
+svuint64_t u64_x_ptrue ()
+{
+  return svdiv_x (svptrue_b64 (), svdup_u64 (5), svdup_u64 (3));
+}
+
+/*
+** u64_z_ptrue:
+**	mov	z[0-9]+\.d, #1
+**	ret
+*/
+svuint64_t u64_z_ptrue ()
+{
+  return svdiv_z (svptrue_b64 (), svdup_u64 (5), svdup_u64 (3));
+}
+
+/*
+** u64_m_ptrue:
+**	mov	z[0-9]+\.d, #1
+**	ret
+*/
+svuint64_t u64_m_ptrue ()
+{
+  return svdiv_m (svptrue_b64 (), svdup_u64 (5), svdup_u64 (3));
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/const_fold_div_zero.c b/gcc/testsuite/gcc.target/aarch64/sve/const_fold_div_zero.c
new file mode 100644
index 00000000000..00d14a46ced
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/const_fold_div_zero.c
@@ -0,0 +1,186 @@
+/* { dg-final { check-function-bodies "**" "" } } */
+/* { dg-options "-O2" } */
+
+#include "arm_sve.h"
+
+/*
+** s64_x_pg_op1:
+**	mov	z[0-9]+\.b, #0
+**	ret
+*/
+svint64_t s64_x_pg_op1 (svbool_t pg, svint64_t op2)
+{
+  return svdiv_x (pg, svdup_s64 (0), op2);
+}
+
+/*
+** s64_z_pg_op1:
+**	mov	z[0-9]+\.b, #0
+**      ret
+*/
+svint64_t s64_z_pg_op1 (svbool_t pg, svint64_t op2)
+{
+  return svdiv_z (pg, svdup_s64 (0), op2);
+}
+
+/*
+** s64_m_pg_op1:
+**	mov	z[0-9]+\.d, p[0-7]/z, #0
+**      ret
+*/
+svint64_t s64_m_pg_op1 (svbool_t pg, svint64_t op2)
+{
+  return svdiv_m (pg, svdup_s64 (0), op2);
+}
+
+/*
+** s64_x_pg_op2:
+**	mov	z[0-9]+\.b, #0
+**	ret
+*/
+svint64_t s64_x_pg_op2 (svbool_t pg, svint64_t op1)
+{
+  return svdiv_x (pg, op1, svdup_s64 (0));
+}
+
+/*
+** s64_z_pg_op2:
+**	mov	z[0-9]+\.b, #0
+**      ret
+*/
+svint64_t s64_z_pg_op2 (svbool_t pg, svint64_t op1)
+{
+  return svdiv_z (pg, op1, svdup_s64 (0));
+}
+
+/*
+** s64_m_pg_op2:
+**	mov	(z[0-9]+)\.b, #0
+**	sdiv	(z[0-9]+\.d), p[0-7]/m, \2, \1\.d
+**      ret
+*/
+svint64_t s64_m_pg_op2 (svbool_t pg, svint64_t op1)
+{
+  return svdiv_m (pg, op1, svdup_s64 (0));
+}
+
+/*
+** s64_m_ptrue_op1:
+**	mov	z[0-9]+\.b, #0
+**      ret
+*/
+svint64_t s64_m_ptrue_op1 (svint64_t op2)
+{
+  return svdiv_m (svptrue_b64 (), svdup_s64 (0), op2);
+}
+
+/*
+** s64_m_ptrue_op2:
+**	mov	z[0-9]+\.b, #0
+**      ret
+*/
+svint64_t s64_m_ptrue_op2 (svint64_t op1)
+{
+  return svdiv_m (svptrue_b64 (), op1, svdup_s64 (0));
+}
+
+/*
+** s64_m_ptrue_op1_op2:
+**	mov	z[0-9]+\.b, #0
+**      ret
+*/
+svint64_t s64_m_ptrue_op1_op2 ()
+{
+  return svdiv_m (svptrue_b64 (), svdup_s64 (0), svdup_s64 (0));
+}
+
+/*
+** u64_x_pg_op1:
+**	mov	z[0-9]+\.b, #0
+**	ret
+*/
+svuint64_t u64_x_pg_op1 (svbool_t pg, svuint64_t op2)
+{
+  return svdiv_x (pg, svdup_u64 (0), op2);
+}
+
+/*
+** u64_z_pg_op1:
+**	mov	z[0-9]+\.b, #0
+**      ret
+*/
+svuint64_t u64_z_pg_op1 (svbool_t pg, svuint64_t op2)
+{
+  return svdiv_z (pg, svdup_u64 (0), op2);
+}
+
+/*
+** u64_m_pg_op1:
+**	mov	z[0-9]+\.d, p[0-7]/z, #0
+**      ret
+*/
+svuint64_t u64_m_pg_op1 (svbool_t pg, svuint64_t op2)
+{
+  return svdiv_m (pg, svdup_u64 (0), op2);
+}
+
+/*
+** u64_x_pg_op2:
+**	mov	z[0-9]+\.b, #0
+**	ret
+*/
+svuint64_t u64_x_pg_op2 (svbool_t pg, svuint64_t op1)
+{
+  return svdiv_x (pg, op1, svdup_u64 (0));
+}
+
+/*
+** u64_z_pg_op2:
+**	mov	z[0-9]+\.b, #0
+**      ret
+*/
+svuint64_t u64_z_pg_op2 (svbool_t pg, svuint64_t op1)
+{
+  return svdiv_z (pg, op1, svdup_u64 (0));
+}
+
+/*
+** u64_m_pg_op2:
+**	mov	(z[0-9]+)\.b, #0
+**	udiv	(z[0-9]+\.d), p[0-7]/m, \2, \1\.d
+**      ret
+*/
+svuint64_t u64_m_pg_op2 (svbool_t pg, svuint64_t op1)
+{
+  return svdiv_m (pg, op1, svdup_u64 (0));
+}
+
+/*
+** u64_m_ptrue_op1:
+**	mov	z[0-9]+\.b, #0
+**      ret
+*/
+svuint64_t u64_m_ptrue_op1 (svuint64_t op2)
+{
+  return svdiv_m (svptrue_b64 (), svdup_u64 (0), op2);
+}
+
+/*
+** u64_m_ptrue_op2:
+**	mov	z[0-9]+\.b, #0
+**      ret
+*/
+svuint64_t u64_m_ptrue_op2 (svuint64_t op1)
+{
+  return svdiv_m (svptrue_b64 (), op1, svdup_u64 (0));
+}
+
+/*
+** u64_m_ptrue_op1_op2:
+**	mov	z[0-9]+\.b, #0
+**      ret
+*/
+svuint64_t u64_m_ptrue_op1_op2 ()
+{
+  return svdiv_m (svptrue_b64 (), svdup_u64 (0), svdup_u64 (0));
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/const_fold_mul_zero.c b/gcc/testsuite/gcc.target/aarch64/sve/const_fold_mul_zero.c
new file mode 100644
index 00000000000..793291449c1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/const_fold_mul_zero.c
@@ -0,0 +1,95 @@
+/* { dg-final { check-function-bodies "**" "" } } */
+/* { dg-options "-O2" } */
+
+#include "arm_sve.h"
+
+/*
+** s64_x_pg_op1:
+**	mov	z[0-9]+\.b, #0
+**	ret
+*/
+svint64_t s64_x_pg_op1 (svbool_t pg, svint64_t op2)
+{
+  return svmul_x (pg, svdup_s64 (0), op2);
+}
+
+/*
+** s64_z_pg_op1:
+**	mov	z[0-9]+\.b, #0
+**      ret
+*/
+svint64_t s64_z_pg_op1 (svbool_t pg, svint64_t op2)
+{
+  return svdiv_z (pg, svdup_s64 (0), op2);
+}
+
+/*
+** s64_m_pg_op1:
+**	mov	z[0-9]+\.d, p[0-7]/z, #0
+**      ret
+*/
+svint64_t s64_m_pg_op1 (svbool_t pg, svint64_t op2)
+{
+  return svdiv_m (pg, svdup_s64 (0), op2);
+}
+
+/*
+** s64_x_pg_op2:
+**	mov	z[0-9]+\.b, #0
+**	ret
+*/
+svint64_t s64_x_pg_op2 (svbool_t pg, svint64_t op1)
+{
+  return svdiv_x (pg, op1, svdup_s64 (0));
+}
+
+/*
+** s64_z_pg_op2:
+**	mov	z[0-9]+\.b, #0
+**      ret
+*/
+svint64_t s64_z_pg_op2 (svbool_t pg, svint64_t op1)
+{
+  return svdiv_z (pg, op1, svdup_s64 (0));
+}
+
+/*
+** s64_m_pg_op2:
+**	mov	(z[0-9]+)\.b, #0
+**	mul	(z[0-9]+\.d), p[0-7]+/m, \2, \1\.d
+**      ret
+*/
+svint64_t s64_m_pg_op2 (svbool_t pg, svint64_t op1)
+{
+  return svdiv_m (pg, op1, svdup_s64 (0));
+}
+
+/*
+** s64_m_ptrue_op1:
+**	mov	z[0-9]+\.b, #0
+**      ret
+*/
+svint64_t s64_m_ptrue_op1 (svint64_t op2)
+{
+  return svdiv_m (svptrue_b64 (), svdup_s64 (0), op2);
+}
+
+/*
+** s64_m_ptrue_op2:
+**	mov	z[0-9]+\.b, #0
+**      ret
+*/
+svint64_t s64_m_ptrue_op2 (svint64_t op1)
+{
+  return svdiv_m (svptrue_b64 (), op1, svdup_s64 (0));
+}
+
+/*
+** s64_m_ptrue_op1_op2:
+**	mov	z[0-9]+\.b, #0
+**      ret
+*/
+svint64_t s64_m_ptrue_op1_op2 ()
+{
+  return svdiv_m (svptrue_b64 (), svdup_s64 (0), svdup_s64 (0));
+}
-- 
2.44.0

