Hi,

PR87528 showed a case where libgcc generated popcount is causing
regression for Skylake.
We also have PR86677 where kernel build is failing because the kernel
does not use the libgcc (when backend is not defining popcount
pattern).  While I agree that the kernel should implement its own
functionality when it is not using the libgcc, I am afraid that the
implementation can have the same performance issues reported for
Skylake in PR87528.

Therefore, I would like to propose that we disable popcount detection
when we don't have a pattern for that. The attached patch (based on
previous discussions) does this.

Bootstrapped and regression tested on x86_64-linux-gnu with no new
regressions. We need to disable the popcount* testcases. I will have
to define a effective_target_with_popcount in
gcc/testsuite/lib/target-supports.exp if this patch is OK?

Thanks,
Kugan


gcc/ChangeLog:

2018-10-25  Kugan Vivekanandarajah  <kug...@linaro.org>

    * tree-scalar-evolution.c (expression_expensive_p): Make BUILTIN POPCOUNT
    as expensive when backend does not define it.


gcc/testsuite/ChangeLog:

2018-10-25  Kugan Vivekanandarajah  <kug...@linaro.org>

    * gcc.target/aarch64/popcount4.c: New test.
From 1cf48663a678def7eb7f464ca4dbadd7e7311155 Mon Sep 17 00:00:00 2001
From: Kugan Vivekanandarajah <kugan.vivekanandarajah@linaro.org>
Date: Wed, 24 Oct 2018 20:33:50 +1100
Subject: [PATCH] fix kernel build

Change-Id: I1ac6d419419c1e87981f7c15916c313a11a23d97
---
 gcc/testsuite/gcc.target/aarch64/popcount4.c | 14 ++++++++++++++
 gcc/tree-scalar-evolution.c                  | 20 ++++++++++++++++++++
 2 files changed, 34 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/popcount4.c

diff --git a/gcc/testsuite/gcc.target/aarch64/popcount4.c b/gcc/testsuite/gcc.target/aarch64/popcount4.c
new file mode 100644
index 0000000..ee55b2e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/popcount4.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized -mgeneral-regs-only" } */
+
+int PopCount (long b) {
+    int c = 0;
+
+    while (b) {
+	b &= b - 1;
+	c++;
+    }
+    return c;
+}
+
+/* { dg-final { scan-tree-dump-times "__builtin_popcount" 0 "optimized" } } */
diff --git a/gcc/tree-scalar-evolution.c b/gcc/tree-scalar-evolution.c
index 6475743..3dcb0d5 100644
--- a/gcc/tree-scalar-evolution.c
+++ b/gcc/tree-scalar-evolution.c
@@ -257,7 +257,9 @@ along with GCC; see the file COPYING3.  If not see
 #include "system.h"
 #include "coretypes.h"
 #include "backend.h"
+#include "target.h"
 #include "rtl.h"
+#include "optabs-query.h"
 #include "tree.h"
 #include "gimple.h"
 #include "ssa.h"
@@ -282,6 +284,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "gimple-fold.h"
 #include "tree-into-ssa.h"
 #include "builtins.h"
+#include "case-cfn-macros.h"
 
 static tree analyze_scalar_evolution_1 (struct loop *, tree);
 static tree analyze_scalar_evolution_for_address_of (struct loop *loop,
@@ -3500,6 +3503,23 @@ expression_expensive_p (tree expr)
     {
       tree arg;
       call_expr_arg_iterator iter;
+      tree fndecl = get_callee_fndecl (expr);
+
+      if (fndecl && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
+	{
+	  combined_fn cfn = as_combined_fn (DECL_FUNCTION_CODE (fndecl));
+	  switch (cfn)
+	    {
+	    CASE_CFN_POPCOUNT:
+	      /* Check if opcode for popcount is available.  */
+	      if (optab_handler (popcount_optab,
+				 TYPE_MODE (TREE_TYPE (CALL_EXPR_ARG (expr, 0))))
+		  == CODE_FOR_nothing)
+		return true;
+	    default:
+	      break;
+	    }
+	}
 
       if (!is_inexpensive_builtin (get_callee_fndecl (expr)))
 	return true;
-- 
2.7.4

Reply via email to