Hi All,

Here is a patch for popcount builtin detection similar to LLVM. I
would like to queue this for review for next stage 1.

1. This is done part of loop-distribution and effective for -O3 and above.
2. This does not distribute loop to detect popcount (like
memcpy/memmove). I dont think that happens in practice. Please correct
me if I am wrong.

Bootstrapped and regression tested on aarch64-linux-gnu with no new regressions.

Thanks,
Kugan

gcc/ChangeLog:

2018-01-25  Kugan Vivekanandarajah  <kug...@linaro.org>

    PR middle-end/82479
    * tree-loop-distribution.c (handle_popcount): New.
    (pass_loop_distribution::execute): Use handle_popcount.

gcc/testsuite/ChangeLog:

2018-01-25  Kugan Vivekanandarajah  <kug...@linaro.org>

    PR middle-end/82479
    * gcc.dg/tree-ssa/popcount.c: New test.
From 9fa09af4b7013c6207e59a4920c82f089bfe45c2 Mon Sep 17 00:00:00 2001
From: Kugan Vivekanandarajah <kugan.vivekanandara...@linaro.org>
Date: Wed, 24 Jan 2018 08:50:08 +1100
Subject: [PATCH] pocount builtin detection

Change-Id: Ic6e175f9cc9a69bd417936a4845c2c046fd446b4

Change-Id: I680eb107445660c60a5d38f5d7300ab1a3243bf5

Change-Id: Ia9f0df89e05520091dc7797195098118768c7ac2
---
 gcc/testsuite/gcc.dg/tree-ssa/popcount.c |  41 +++++++++
 gcc/tree-loop-distribution.c             | 145 +++++++++++++++++++++++++++++++
 2 files changed, 186 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/popcount.c

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/popcount.c b/gcc/testsuite/gcc.dg/tree-ssa/popcount.c
new file mode 100644
index 0000000..86a66cb
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/popcount.c
@@ -0,0 +1,41 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fdump-tree-optimized" } */
+
+extern int foo (int);
+
+int PopCount (long b) {
+    int c = 0;
+    b++;
+
+    while (b) {
+	b &= b - 1;
+	c++;
+    }
+    return c;
+}
+int PopCount2 (long b) {
+    int c = 0;
+
+    while (b) {
+	b &= b - 1;
+	c++;
+    }
+    foo (c);
+    return foo (c);
+}
+
+void PopCount3 (long b1) {
+
+    for (long i = 0; i < b1; ++i)
+      {
+	long b = i;
+	int c = 0;
+	while (b) {
+	    b &= b - 1;
+	    c++;
+	}
+	foo (c);
+      }
+}
+
+/* { dg-final { scan-tree-dump-times "__builtin_popcount" 3 "optimized" } } */
diff --git a/gcc/tree-loop-distribution.c b/gcc/tree-loop-distribution.c
index a3d76e4..1060700 100644
--- a/gcc/tree-loop-distribution.c
+++ b/gcc/tree-loop-distribution.c
@@ -1585,6 +1585,148 @@ classify_builtin_ldst (loop_p loop, struct graph *rdg, partition *partition,
   return;
 }
 
+/* See if loop is a popcout implementation of the form
+
+    int c = 0;
+    while (b) {
+	b = b & (b - 1);
+	c++;
+    }
+
+    If so, convert this into c = __builtin_popcount (b)
+    return true if we did, false otherwise.  */
+
+
+static bool
+handle_popcount (loop_p loop)
+{
+  tree lhs, rhs;
+  tree dest, src;
+  gimple *and_minus_one;
+  int count = 0;
+  gphi *count_phi;
+  gimple *fn_call;
+  gimple *use_stmt;
+  use_operand_p use_p;
+  imm_use_iterator iter;
+  gimple_stmt_iterator gsi;
+
+  /* Check loop terminating branch is like
+     if (b != 0).  */
+  gimple *stmt = last_stmt (loop->header);
+  if (!stmt
+      || gimple_code (stmt) != GIMPLE_COND
+      || !zerop (gimple_cond_rhs (stmt)))
+    return false;
+
+  /* Cheeck "b = b & (b - 1)" is calculated.  */
+  lhs = gimple_cond_lhs (stmt);
+  gimple *and_stmt = SSA_NAME_DEF_STMT (lhs);
+  if (gimple_assign_rhs_code (and_stmt) != BIT_AND_EXPR)
+    return false;
+  lhs = gimple_assign_rhs1 (and_stmt);
+  rhs = gimple_assign_rhs2 (and_stmt);
+  if (TREE_CODE (lhs) == SSA_NAME
+      && (and_minus_one = SSA_NAME_DEF_STMT (lhs))
+      && is_gimple_assign (and_minus_one)
+      && (gimple_assign_rhs_code (and_minus_one) == PLUS_EXPR)
+      && integer_minus_onep (gimple_assign_rhs2 (and_minus_one)))
+      lhs = rhs;
+  else if (TREE_CODE (rhs) == SSA_NAME
+      && (and_minus_one = SSA_NAME_DEF_STMT (rhs))
+      && is_gimple_assign (and_minus_one)
+      && (gimple_assign_rhs_code (and_minus_one) == PLUS_EXPR)
+      && integer_minus_onep (gimple_assign_rhs2 (and_minus_one)))
+      ;
+  else
+    return false;
+  if ((gimple_assign_rhs1 (and_stmt) != gimple_assign_rhs1 (and_minus_one))
+      && (gimple_assign_rhs2 (and_stmt) != gimple_assign_rhs1 (and_minus_one)))
+    return false;
+
+  /* Check the recurrence.  */
+  gimple *phi = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (and_minus_one));
+  gimple *src_phi = SSA_NAME_DEF_STMT (lhs);
+  if (gimple_code (phi) != GIMPLE_PHI
+      || gimple_code (src_phi) != GIMPLE_PHI)
+    return false;
+
+  /* Check the loop closed SSA definition for just the variable c defined in
+     loop.  */
+  src = gimple_phi_arg_def (src_phi, loop_preheader_edge (loop)->dest_idx);
+  basic_block bb = single_exit (loop)->dest;
+  for (gphi_iterator gpi = gsi_start_phis (bb);
+       !gsi_end_p (gpi); gsi_next (&gpi))
+    {
+      count_phi = gpi.phi ();
+      count++;
+    }
+  if (count != 1)
+    return false;
+
+  /* Make sure we have a count by one and it starts from zero.  */
+  rhs = gimple_phi_arg_def (count_phi, 0);
+  stmt = SSA_NAME_DEF_STMT (rhs);
+  if  (!is_gimple_assign (stmt)
+      || (gimple_assign_rhs_code (stmt) != PLUS_EXPR)
+      || !integer_onep (gimple_assign_rhs2 (stmt)))
+    return false;
+  rhs = gimple_assign_rhs1 (stmt);
+  stmt = SSA_NAME_DEF_STMT (rhs);
+  if (gimple_code (stmt) != GIMPLE_PHI
+      || !integer_zerop (gimple_phi_arg_def (stmt, loop_preheader_edge (loop)->dest_idx)))
+    return false;
+
+  /* Create a var for builtin_popcount result and update the uses.  */
+  dest = gimple_phi_result (count_phi);
+  tree var = make_ssa_name (TREE_TYPE (dest), NULL);
+  FOR_EACH_IMM_USE_STMT (use_stmt, iter, dest)
+    {
+      FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
+	SET_USE (use_p, var);
+    }
+  dest = var;
+
+  /* Remove the loop closed PHI stmt.  */
+  gsi = gsi_after_labels (gimple_bb (count_phi));
+  gphi_iterator gsi_phi = gsi_for_phi (count_phi);
+  remove_phi_node (&gsi_phi, true);
+
+  /* Insert the builtin.  */
+  gsi = gsi_after_labels (loop_preheader_edge (loop)->src);
+  src = force_gimple_operand_gsi (&gsi, src, true, NULL_TREE,
+				  false, GSI_CONTINUE_LINKING);
+  tree fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_POPCOUNT));
+  fn_call = gimple_build_call (fn, 1, src);
+  gimple_call_set_lhs (fn_call, dest);
+  gsi_insert_before (&gsi, fn_call, GSI_CONTINUE_LINKING);
+
+  /* In  most cases, we will have if (b != 0) preceeding the loop in
+     the form
+     if (b != 0)
+     {
+	loop;
+     }
+
+     Since __builtin_popcount (0) is defined, we can remove this condition
+     by making it always true.  */
+
+  stmt = last_stmt (EDGE_PRED (loop_preheader_edge (loop)->src, 0)->src);
+  if (stmt
+      && gimple_code (stmt) == GIMPLE_COND
+      && (gimple_cond_lhs (stmt) == src)
+      && zerop (gimple_cond_rhs (stmt)))
+    {
+      gimple_cond_set_lhs (as_a <gcond *> (stmt),
+			   build_int_cst (TREE_TYPE (src), 1));
+      update_stmt (stmt);
+    }
+
+  /* Finaly remove the loop.  */
+  destroy_loop (loop);
+  return true;
+}
+
 /* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP.
    For the moment we detect memset, memcpy and memmove patterns.  Bitmap
    STMT_IN_ALL_PARTITIONS contains statements belonging to all partitions.  */
@@ -3032,6 +3174,9 @@ pass_loop_distribution::execute (function *fun)
 	  || !optimize_loop_for_speed_p (loop))
 	continue;
 
+      if (handle_popcount (loop))
+	continue;
+
       /* Don't distribute loop if niters is unknown.  */
       tree niters = number_of_latch_executions (loop);
       if (niters == NULL_TREE || niters == chrec_dont_know)
-- 
2.7.4

Reply via email to