Hi, In r10-4525, and r10-4161, loop unroller was enabled for simply loops at -O2. At the same time, the GIMPLE cunroll is also enabled, while it is not only for simple loops. This patch introduces a hook to check if a loop is suitable to unroll completely. The hook can be used to check if a loop is simply enough for complete unroll, then avoid negative effects like size increasing or complex loop unrolling at -O2.
This patch could help to handle the code in PR95018 which contains complex loop, and does not cause obvious performance change on SPEC2017. Bootstrap/regtest pass on powerpc64le. OK for trunk? Jiufu 2020-05-20 Jiufu Guo <guoji...@cn.ibm.com> PR target/95018 * target.def (loop_allow_unroll_completely_peel): New hook. * doc/tm.texi (TARGET_LOOP_ALLOW_UNROLL_COMPLETELY_PEEL): New hook. * doc/tm.texi.in (TARGET_LOOP_ALLOW_UNROLL_COMPLETELY_PEEL): New hook. * config/rs6000/rs6000.c (TARGET_LOOP_ALLOW_UNROLL_COMPLETELY_PEEL): New hook. (rs6000_loop_allow_unroll_completely_peel): New hook implementation. * tree-ssa-loop-ivcanon.c (canonicalize_loop_induction_variables): Call hook loop_allow_unroll_completely_peel. --- gcc/config/rs6000/rs6000.c | 19 +++++++++++++++++++ gcc/doc/tm.texi | 9 +++++++++ gcc/doc/tm.texi.in | 2 ++ gcc/target.def | 12 ++++++++++++ gcc/tree-ssa-loop-ivcanon.c | 5 +++++ 5 files changed, 47 insertions(+) diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 8435bc15d72..a1a3f9cb583 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -1433,6 +1433,9 @@ static const struct attribute_spec rs6000_attribute_table[] = #undef TARGET_LOOP_UNROLL_ADJUST #define TARGET_LOOP_UNROLL_ADJUST rs6000_loop_unroll_adjust +#undef TARGET_LOOP_ALLOW_UNROLL_COMPLETELY_PEEL +#define TARGET_LOOP_ALLOW_UNROLL_COMPLETELY_PEEL rs6000_loop_allow_unroll_completely_peel + #undef TARGET_INIT_BUILTINS #define TARGET_INIT_BUILTINS rs6000_init_builtins #undef TARGET_BUILTIN_DECL @@ -5139,6 +5142,22 @@ rs6000_loop_unroll_adjust (unsigned nunroll, struct loop *loop) return nunroll; } +/* Implement targetm.loop_allow_unroll_completely_peel. */ + +static bool +rs6000_loop_allow_unroll_completely_peel (HOST_WIDE_INT maxiter, tree niter, + struct loop *loop) +{ + if (unroll_only_small_loops && optimize == 2) + { + if (maxiter >= 4 + && !(TREE_CODE (niter) == INTEGER_CST && single_exit (loop))) + return false; + } + + return true; +} + /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a library with vectorized intrinsics. */ diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi index 6e7d9dc54a9..f7419872c2f 100644 --- a/gcc/doc/tm.texi +++ b/gcc/doc/tm.texi @@ -11893,6 +11893,15 @@ is required only when the target has special constraints like maximum number of memory accesses. @end deftypefn +@deftypefn {Target Hook} bool TARGET_LOOP_ALLOW_UNROLL_COMPLETELY_PEEL (HOST_WIDE_INT @var{maxiter}, tree @var{niter}, class loop *@var{loop}) +This target hook check if the loop @var{loop} is suitable to unroll +completely on the target. The parameter @var{maxiter} is the possible max +bound of iterations. The parameter @var{niter} is the number expression of +iterations the loop is executed. The parameter @var{loop} is a pointer to +the loop. This target hook is required only when the target has special +constraints. +@end deftypefn + @defmac POWI_MAX_MULTS If defined, this macro is interpreted as a signed integer C expression that specifies the maximum number of floating point multiplications diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in index 3be984bbd5c..4ae079a650c 100644 --- a/gcc/doc/tm.texi.in +++ b/gcc/doc/tm.texi.in @@ -8010,6 +8010,8 @@ lists. @hook TARGET_LOOP_UNROLL_ADJUST +@hook TARGET_LOOP_ALLOW_UNROLL_COMPLETELY_PEEL + @defmac POWI_MAX_MULTS If defined, this macro is interpreted as a signed integer C expression that specifies the maximum number of floating point multiplications diff --git a/gcc/target.def b/gcc/target.def index 07059a87caf..3842ba611a2 100644 --- a/gcc/target.def +++ b/gcc/target.def @@ -2709,6 +2709,18 @@ number of memory accesses.", unsigned, (unsigned nunroll, class loop *loop), NULL) +/* Return if a loop is suitable for complete unroll. */ +DEFHOOK +(loop_allow_unroll_completely_peel, + "This target hook check if the loop @var{loop} is suitable to unroll\n\ +completely on the target. The parameter @var{maxiter} is the possible max\n\ +bound of iterations. The parameter @var{niter} is the number expression of\n\ +iterations the loop is executed. The parameter @var{loop} is a pointer to\n\ +the loop. This target hook is required only when the target has special\n\ +constraints.", + bool, (HOST_WIDE_INT maxiter, tree niter, class loop *loop), + NULL) + /* True if X is a legitimate MODE-mode immediate operand. */ DEFHOOK (legitimate_constant_p, diff --git a/gcc/tree-ssa-loop-ivcanon.c b/gcc/tree-ssa-loop-ivcanon.c index 8ab6ab3330c..52d45a90a56 100644 --- a/gcc/tree-ssa-loop-ivcanon.c +++ b/gcc/tree-ssa-loop-ivcanon.c @@ -39,6 +39,7 @@ along with GCC; see the file COPYING3. If not see #include "system.h" #include "coretypes.h" #include "backend.h" +#include "target.h" #include "tree.h" #include "gimple.h" #include "cfghooks.h" @@ -1231,6 +1232,10 @@ canonicalize_loop_induction_variables (class loop *loop, /* Force re-computation of loop bounds so we can remove redundant exits. */ maxiter = max_loop_iterations_int (loop); + if (allow_peel && targetm.loop_allow_unroll_completely_peel + && !targetm.loop_allow_unroll_completely_peel (maxiter, niter, loop)) + return false; + if (dump_file && (dump_flags & TDF_DETAILS) && TREE_CODE (niter) == INTEGER_CST) { -- 2.17.1