Hi, PR87528 showed a case where libgcc generated popcount is causing regression for Skylake. We also have PR86677 where kernel build is failing because the kernel does not use the libgcc (when backend is not defining popcount pattern). While I agree that the kernel should implement its own functionality when it is not using the libgcc, I am afraid that the implementation can have the same performance issues reported for Skylake in PR87528.
Therefore, I would like to propose that we disable popcount detection when we don't have a pattern for that. The attached patch (based on previous discussions) does this. Bootstrapped and regression tested on x86_64-linux-gnu with no new regressions. We need to disable the popcount* testcases. I will have to define a effective_target_with_popcount in gcc/testsuite/lib/target-supports.exp if this patch is OK? Thanks, Kugan gcc/ChangeLog: 2018-10-25 Kugan Vivekanandarajah <kug...@linaro.org> * tree-scalar-evolution.c (expression_expensive_p): Make BUILTIN POPCOUNT as expensive when backend does not define it. gcc/testsuite/ChangeLog: 2018-10-25 Kugan Vivekanandarajah <kug...@linaro.org> * gcc.target/aarch64/popcount4.c: New test.
From 1cf48663a678def7eb7f464ca4dbadd7e7311155 Mon Sep 17 00:00:00 2001 From: Kugan Vivekanandarajah <kugan.vivekanandarajah@linaro.org> Date: Wed, 24 Oct 2018 20:33:50 +1100 Subject: [PATCH] fix kernel build Change-Id: I1ac6d419419c1e87981f7c15916c313a11a23d97 --- gcc/testsuite/gcc.target/aarch64/popcount4.c | 14 ++++++++++++++ gcc/tree-scalar-evolution.c | 20 ++++++++++++++++++++ 2 files changed, 34 insertions(+) create mode 100644 gcc/testsuite/gcc.target/aarch64/popcount4.c diff --git a/gcc/testsuite/gcc.target/aarch64/popcount4.c b/gcc/testsuite/gcc.target/aarch64/popcount4.c new file mode 100644 index 0000000..ee55b2e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/popcount4.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-optimized -mgeneral-regs-only" } */ + +int PopCount (long b) { + int c = 0; + + while (b) { + b &= b - 1; + c++; + } + return c; +} + +/* { dg-final { scan-tree-dump-times "__builtin_popcount" 0 "optimized" } } */ diff --git a/gcc/tree-scalar-evolution.c b/gcc/tree-scalar-evolution.c index 6475743..3dcb0d5 100644 --- a/gcc/tree-scalar-evolution.c +++ b/gcc/tree-scalar-evolution.c @@ -257,7 +257,9 @@ along with GCC; see the file COPYING3. If not see #include "system.h" #include "coretypes.h" #include "backend.h" +#include "target.h" #include "rtl.h" +#include "optabs-query.h" #include "tree.h" #include "gimple.h" #include "ssa.h" @@ -282,6 +284,7 @@ along with GCC; see the file COPYING3. If not see #include "gimple-fold.h" #include "tree-into-ssa.h" #include "builtins.h" +#include "case-cfn-macros.h" static tree analyze_scalar_evolution_1 (struct loop *, tree); static tree analyze_scalar_evolution_for_address_of (struct loop *loop, @@ -3500,6 +3503,23 @@ expression_expensive_p (tree expr) { tree arg; call_expr_arg_iterator iter; + tree fndecl = get_callee_fndecl (expr); + + if (fndecl && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL) + { + combined_fn cfn = as_combined_fn (DECL_FUNCTION_CODE (fndecl)); + switch (cfn) + { + CASE_CFN_POPCOUNT: + /* Check if opcode for popcount is available. */ + if (optab_handler (popcount_optab, + TYPE_MODE (TREE_TYPE (CALL_EXPR_ARG (expr, 0)))) + == CODE_FOR_nothing) + return true; + default: + break; + } + } if (!is_inexpensive_builtin (get_callee_fndecl (expr))) return true; -- 2.7.4