On Tue, Oct 27, 2020 at 6:32 AM Alexandre Oliva <[email protected]> wrote:
>
> On Oct 23, 2020, Richard Biener <[email protected]> wrote:
>
> > Can you move it one pass further after sink please?
>
> I did, but it didn't solve the recip regressions that my first attempt
> brought about.
>
> > Also I don't
> > remember exactly but does pass_sincos only handle sin/cos unifying?
>
> It rearranges some powi computations, and that's what breaks recip. It
> adds a copy of y*y in extract_recip_[34], we'd need a forwprop or
> similar to get rid of the trivial copy before recip could do its job
> properly again.
>
> So I figured I'd try to cse type conversions before sincos, and that
> turned out to be pretty easy, and it didn't regress anything.
>
> Regstrapped on x86_64-linux-gnu. Ok to install?
>
>
> CSE conversions within sincos
>
> From: Alexandre Oliva <[email protected]>
>
> On platforms in which Aux_[Real_Type] involves non-NOP conversions
> (e.g., between single- and double-precision, or between short float
> and float), the conversions before the calls are CSEd too late for
> sincos to combine calls.
>
> This patch enables the sincos pass to CSE type casts used as arguments
> to eligible calls before looking for other calls using the same
> operand.
>
>
> for gcc/ChangeLog
>
> * tree-ssa-math-opts.c (sincos_stats): Rename inserted to
> sincos_inserted. Add conv_inserted.
> (maybe_record_sincos): Rename to...
> (maybe_record_stmt): ... this.
> (execute_cse_conv_1): New.
> (execute_cse_sincos_1): Call it. Adjust.
> (pass_cse_sincos::execute): Adjust. Report conv_inserted.
>
> for gcc/testsuite/ChangeLog
>
> * gnat.dg/sin_cos.ads: New.
> * gnat.dg/sin_cos.adb: New.
> * gcc.dg/sin_cos.c: New.
> ---
> gcc/testsuite/gcc.dg/sin_cos.c | 41 +++++++++++++
> gcc/testsuite/gnat.dg/sin_cos.adb | 14 ++++
> gcc/testsuite/gnat.dg/sin_cos.ads | 4 +
> gcc/tree-ssa-math-opts.c | 119
> +++++++++++++++++++++++++++++++++++--
> 4 files changed, 171 insertions(+), 7 deletions(-)
> create mode 100644 gcc/testsuite/gcc.dg/sin_cos.c
> create mode 100644 gcc/testsuite/gnat.dg/sin_cos.adb
> create mode 100644 gcc/testsuite/gnat.dg/sin_cos.ads
>
> diff --git a/gcc/testsuite/gcc.dg/sin_cos.c b/gcc/testsuite/gcc.dg/sin_cos.c
> new file mode 100644
> index 00000000..aa71dca
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/sin_cos.c
> @@ -0,0 +1,41 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +/* This maps to essentially the same gimple that is generated for
> + gnat.dg/sin_cos.adb, on platforms that use the wraplf variant of
> + Ada.Numerics.Aux_Float. The value of EPSILON is not relevant to
> + the test, but the test must be there to keep the conversions in
> + different BBs long enough to trigger the problem that prevented the
> + sincos optimization, because the arguments passed to sin and cos
> + didn't get unified into a single SSA_NAME in time for sincos. */
> +
> +#include <math.h>
> +
> +#define EPSILON 3.4526697709225118160247802734375e-4
> +
> +static float my_sinf(float x) {
> + return (float) sin ((double) x);
> +}
> +
> +static float wrap_sinf(float x) {
> + if (fabs (x) < EPSILON)
> + return 0;
> + return my_sinf (x);
> +}
> +
> +static float my_cosf(float x) {
> + return (float) cos ((double) x);
> +}
> +
> +static float wrap_cosf(float x) {
> + if (fabs (x) < EPSILON)
> + return 1;
> + return my_cosf (x);
> +}
> +
> +float my_sin_cos(float x, float *s, float *c) {
> + *s = wrap_sinf (x);
> + *c = wrap_cosf (x);
> +}
> +
> +/* { dg-final { scan-assembler "sincos\|cexp" { target *-linux-gnu*
> *-w64-mingw* *-*-vxworks* } } } */
> diff --git a/gcc/testsuite/gnat.dg/sin_cos.adb
> b/gcc/testsuite/gnat.dg/sin_cos.adb
> new file mode 100644
> index 00000000..6e18df9
> --- /dev/null
> +++ b/gcc/testsuite/gnat.dg/sin_cos.adb
> @@ -0,0 +1,14 @@
> +-- { dg-do compile }
> +-- { dg-options "-O2 -gnatn" }
> +
> +with Ada.Numerics.Elementary_Functions;
> +use Ada.Numerics.Elementary_Functions;
> +package body Sin_Cos is
> + procedure Sin_Cos (Angle : T; SinA, CosA : out T) is
> + begin
> + SinA := Sin (Angle);
> + CosA := Cos (Angle);
> + end;
> +end Sin_Cos;
> +
> +-- { dg-final { scan-assembler "sincos\|cexp" { target *-linux-gnu*
> *-w64-mingw* *-*-vxworks* } } }
> diff --git a/gcc/testsuite/gnat.dg/sin_cos.ads
> b/gcc/testsuite/gnat.dg/sin_cos.ads
> new file mode 100644
> index 00000000..a0eff3d
> --- /dev/null
> +++ b/gcc/testsuite/gnat.dg/sin_cos.ads
> @@ -0,0 +1,4 @@
> +package Sin_Cos is
> + subtype T is Float;
> + procedure Sin_Cos (Angle : T; SinA, CosA : out T);
> +end Sin_Cos;
> diff --git a/gcc/tree-ssa-math-opts.c b/gcc/tree-ssa-math-opts.c
> index 90dfb98..a32f5ca 100644
> --- a/gcc/tree-ssa-math-opts.c
> +++ b/gcc/tree-ssa-math-opts.c
> @@ -186,7 +186,11 @@ static struct
> static struct
> {
> /* Number of cexpi calls inserted. */
> - int inserted;
> + int sincos_inserted;
> +
> + /* Number of conversions inserted. */
> + int conv_inserted;
> +
> } sincos_stats;
>
> static struct
> @@ -1106,7 +1110,7 @@ make_pass_cse_reciprocals (gcc::context *ctxt)
> statements in the vector. */
>
> static bool
> -maybe_record_sincos (vec<gimple *> *stmts,
> +maybe_record_stmt (vec<gimple *> *stmts,
> basic_block *top_bb, gimple *use_stmt)
> {
> basic_block use_bb = gimple_bb (use_stmt);
> @@ -1126,6 +1130,103 @@ maybe_record_sincos (vec<gimple *> *stmts,
> return true;
> }
>
> +/* If NAME is the result of a type conversion, look for other
> + conversions from the same source to the same type and CSE them.
> + Replace results of conversions in sin, cos and cexpi calls with the
> + CSEd SSA_NAME. Return a SSA_NAME that holds the result of the
> + conversion to be used instead of NAME. */
> +
> +static tree
> +execute_cse_conv_1 (tree name)
> +{
> + if (SSA_NAME_IS_DEFAULT_DEF (name))
> + return name;
> +
> + gimple *def_stmt = SSA_NAME_DEF_STMT (name);
> +
> + if (!gimple_assign_cast_p (def_stmt))
> + return name;
> +
> + tree src = gimple_assign_rhs1 (def_stmt);
For trapping math SRC may be a constant? Better be safe
and guard against TREE_CODE (src) != SSA_NAME.
You also want to guard against SSA_NAME_OCCURS_IN_ABNORMAL_PHI (src)
since you cannot generally propagate or move uses of those. That
applies to 'name' as well, and ...
> + int count = 0;
> + imm_use_iterator use_iter;
> + gimple *use_stmt;
> + auto_vec<gimple *> stmts;
> + basic_block top_bb = NULL;
> +
> + /* Record equivalent conversions from the same source. */
> + FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, src)
> + {
> + if (!gimple_assign_cast_p (use_stmt))
> + continue;
... the lhs of the other conversions.
> + if (!types_compatible_p (TREE_TYPE (name),
> + TREE_TYPE (gimple_assign_lhs (use_stmt))))
> + continue;
> +
> + gcc_checking_assert (gimple_assign_rhs1 (use_stmt)
> + == gimple_assign_rhs1 (def_stmt));
> +
> + if (maybe_record_stmt (&stmts, &top_bb, use_stmt))
> + count++;
> + }
> +
> + if (count <= 1)
> + return name;
> +
> + /* Build and insert a new conversion. */
> + name = make_temp_ssa_name (TREE_TYPE (name), def_stmt, "convtmp");
> + gimple *stmt = gimple_build_assign (name,
> + gimple_assign_rhs_code (def_stmt),
> + gimple_assign_rhs1 (def_stmt));
> +
> + gimple_stmt_iterator gsi;
> + if (!SSA_NAME_IS_DEFAULT_DEF (name)
> + && gimple_code (def_stmt) != GIMPLE_PHI
> + && gimple_bb (def_stmt) == top_bb)
> + gsi = gsi_for_stmt (def_stmt);
So I think this will go wrong if def_stmt is in a BB like
def1 = (T) src;
def2 = (T) src;
and def_stmt is def2. I think you need to look at the
definition of SRC instead, so
if (!SSA_NAME_IS_DEFAULT_DEF (src)
&& gimple_code (SSA_NAME_DEF_STMT (src)) != GIMPLE_PHI
&& gimple_bb (SSA_NAME_DEF_STMT (src)) == top_bb)
{
gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (src);
gsi_insert_after (...);
> + else
> + gsi = gsi_after_labels (top_bb);
> + gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
> + update_stmt (stmt);
> + sincos_stats.conv_inserted++;
> +
> + /* And adjust the recorded old conversion sites. */
> + for (int i = 0; stmts.iterate (i, &use_stmt); ++i)
> + {
> + tree lhs = gimple_assign_lhs (use_stmt);
> + gimple_assign_set_rhs1 (use_stmt, name);
> + gimple_assign_set_rhs_code (use_stmt, SSA_NAME);
> + update_stmt (use_stmt);
> +
> + /* Replace uses of LHS with NAME in sin, cos and cexpi calls
> + right away, so that we can recognize them as taking the same
> + arguments. */
> + FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, lhs)
> + {
> + if (gimple_code (use_stmt) != GIMPLE_CALL
> + || !gimple_call_lhs (use_stmt))
> + continue;
Any reason you are not replacing all uses via replace_uses_by
and removing the old conversion stmts? OK, removing might
wreck the upthread iterator so replacing with GIMPLE_NOP
is the usual trick then.
> + switch (gimple_call_combined_fn (use_stmt))
> + {
> + CASE_CFN_COS:
> + CASE_CFN_SIN:
> + CASE_CFN_CEXPI:
> + gcc_checking_assert (gimple_call_arg (use_stmt, 0) == lhs);
> + gimple_call_set_arg (use_stmt, 0, name);
> + update_stmt (use_stmt);
> + break;
> +
> + default:
> + continue;
> + }
> + }
> + }
> +
> + return name;
> +}
> +
> /* Look for sin, cos and cexpi calls with the same argument NAME and
> create a single call to cexpi CSEing the result in this case.
> We first walk over all immediate uses of the argument collecting
> @@ -1147,6 +1248,8 @@ execute_cse_sincos_1 (tree name)
> int i;
> bool cfg_changed = false;
>
> + name = execute_cse_conv_1 (name);
> +
> FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, name)
> {
> if (gimple_code (use_stmt) != GIMPLE_CALL
> @@ -1156,15 +1259,15 @@ execute_cse_sincos_1 (tree name)
> switch (gimple_call_combined_fn (use_stmt))
> {
> CASE_CFN_COS:
> - seen_cos |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
> + seen_cos |= maybe_record_stmt (&stmts, &top_bb, use_stmt) ? 1 : 0;
> break;
>
> CASE_CFN_SIN:
> - seen_sin |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
> + seen_sin |= maybe_record_stmt (&stmts, &top_bb, use_stmt) ? 1 : 0;
> break;
>
> CASE_CFN_CEXPI:
> - seen_cexpi |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 :
> 0;
> + seen_cexpi |= maybe_record_stmt (&stmts, &top_bb, use_stmt) ? 1 : 0;
> break;
>
> default:;
> @@ -1208,7 +1311,7 @@ execute_cse_sincos_1 (tree name)
> gsi = gsi_after_labels (top_bb);
> gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
> }
> - sincos_stats.inserted++;
> + sincos_stats.sincos_inserted++;
>
> /* And adjust the recorded old call sites. */
> for (i = 0; stmts.iterate (i, &use_stmt); ++i)
> @@ -2295,7 +2398,9 @@ pass_cse_sincos::execute (function *fun)
> }
>
> statistics_counter_event (fun, "sincos statements inserted",
> - sincos_stats.inserted);
> + sincos_stats.sincos_inserted);
> + statistics_counter_event (fun, "conv statements inserted",
> + sincos_stats.conv_inserted);
>
> return cfg_changed ? TODO_cleanup_cfg : 0;
> }
>
>
> --
> Alexandre Oliva, happy hacker
> https://FSFLA.org/blogs/lxo/
> Free Software Activist
> GNU Toolchain Engineer