On October 19, 2015 4:42:23 PM GMT+02:00, Richard Sandiford <richard.sandif...@arm.com> wrote: >The fold code also expanded cabs(x+yi) to fsqrt(x*x+y*y) when >optimising >for speed. tree-ssa-math-opts.c has this transformation too, but >unlike >the fold code, it first checks whether the target implements the sqrt >optab. The patch simply removes the fold code and keeps the >tree-ssa-math-opts.c logic the same. > >gcc.dg/lto/20110201-1_0.c was relying on us replacing cabs >with fsqrt even on targets where fsqrt is itself a library call. >The discussion leading up to that patch suggested that we only >want to test the fold on targets with a square root instruction, >so it would be OK to skip the test on other targets: > > https://gcc.gnu.org/ml/gcc-patches/2011-07/msg01961.html > https://gcc.gnu.org/ml/gcc-patches/2011-07/msg02036.html > >The patch does that using the sqrt_insn effective target. > >It's possible that removing the tree folds renders the LTO trick >unnecessary, but since the test was originally for an ICE, it seems >better to leave it as-is. > >Tested on x86_64-linux-gnu, aarch64-linux-gnu and arm-linux-gnueabi. >20110201-1_0.c passes on all three. OK to install?
OK. Thanks, Richard. >Thanks, >Richard > > >gcc/ > * builtins.c (fold_builtin_cabs): Delete. > (fold_builtin_1): Update accordingly. Handle constant arguments here. > * match.pd: Add rules previously handled by fold_builtin_cabs. > >gcc/testsuite/ > * gcc.dg/lto/20110201-1_0.c: Restrict to sqrt_insn targets. > Add associated options for arm*-*-*. > (sqrt): Remove dummy definition. > >diff --git a/gcc/builtins.c b/gcc/builtins.c >index 1e4ec35..8f87fd9 100644 >--- a/gcc/builtins.c >+++ b/gcc/builtins.c >@@ -7539,82 +7539,6 @@ fold_fixed_mathfn (location_t loc, tree fndecl, >tree arg) > return NULL_TREE; > } > >-/* Fold call to builtin cabs, cabsf or cabsl with argument ARG. TYPE >is the >- return type. Return NULL_TREE if no simplification can be made. >*/ >- >-static tree >-fold_builtin_cabs (location_t loc, tree arg, tree type, tree fndecl) >-{ >- tree res; >- >- if (!validate_arg (arg, COMPLEX_TYPE) >- || TREE_CODE (TREE_TYPE (TREE_TYPE (arg))) != REAL_TYPE) >- return NULL_TREE; >- >- /* Calculate the result when the argument is a constant. */ >- if (TREE_CODE (arg) == COMPLEX_CST >- && (res = do_mpfr_arg2 (TREE_REALPART (arg), TREE_IMAGPART >(arg), >- type, mpfr_hypot))) >- return res; >- >- if (TREE_CODE (arg) == COMPLEX_EXPR) >- { >- tree real = TREE_OPERAND (arg, 0); >- tree imag = TREE_OPERAND (arg, 1); >- >- /* If either part is zero, cabs is fabs of the other. */ >- if (real_zerop (real)) >- return fold_build1_loc (loc, ABS_EXPR, type, imag); >- if (real_zerop (imag)) >- return fold_build1_loc (loc, ABS_EXPR, type, real); >- >- /* cabs(x+xi) -> fabs(x)*sqrt(2). */ >- if (flag_unsafe_math_optimizations >- && operand_equal_p (real, imag, OEP_PURE_SAME)) >- { >- STRIP_NOPS (real); >- return fold_build2_loc (loc, MULT_EXPR, type, >- fold_build1_loc (loc, ABS_EXPR, type, real), >- build_real_truncate (type, dconst_sqrt2 ())); >- } >- } >- >- /* Optimize cabs(-z) and cabs(conj(z)) as cabs(z). */ >- if (TREE_CODE (arg) == NEGATE_EXPR >- || TREE_CODE (arg) == CONJ_EXPR) >- return build_call_expr_loc (loc, fndecl, 1, TREE_OPERAND (arg, >0)); >- >- /* Don't do this when optimizing for size. */ >- if (flag_unsafe_math_optimizations >- && optimize && optimize_function_for_speed_p (cfun)) >- { >- tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT); >- >- if (sqrtfn != NULL_TREE) >- { >- tree rpart, ipart, result; >- >- arg = builtin_save_expr (arg); >- >- rpart = fold_build1_loc (loc, REALPART_EXPR, type, arg); >- ipart = fold_build1_loc (loc, IMAGPART_EXPR, type, arg); >- >- rpart = builtin_save_expr (rpart); >- ipart = builtin_save_expr (ipart); >- >- result = fold_build2_loc (loc, PLUS_EXPR, type, >- fold_build2_loc (loc, MULT_EXPR, type, >- rpart, rpart), >- fold_build2_loc (loc, MULT_EXPR, type, >- ipart, ipart)); >- >- return build_call_expr_loc (loc, sqrtfn, 1, result); >- } >- } >- >- return NULL_TREE; >-} >- > /* Build a complex (inf +- 0i) for the result of cproj. TYPE is the > complex tree type of the result. If NEG is true, the imaginary > zero is negative. */ >@@ -9683,7 +9607,11 @@ fold_builtin_1 (location_t loc, tree fndecl, >tree arg0) > break; > > CASE_FLT_FN (BUILT_IN_CABS): >- return fold_builtin_cabs (loc, arg0, type, fndecl); >+ if (TREE_CODE (arg0) == COMPLEX_CST >+ && TREE_CODE (TREE_TYPE (TREE_TYPE (arg0))) == REAL_TYPE) >+ return do_mpfr_arg2 (TREE_REALPART (arg0), TREE_IMAGPART >(arg0), >+ type, mpfr_hypot); >+ break; > > CASE_FLT_FN (BUILT_IN_CARG): > return fold_builtin_carg (loc, arg0, type); >diff --git a/gcc/match.pd b/gcc/match.pd >index d677e69..55687c3 100644 >--- a/gcc/match.pd >+++ b/gcc/match.pd >@@ -67,6 +67,7 @@ along with GCC; see the file COPYING3. If not see > (define_operator_list COPYSIGN BUILT_IN_COPYSIGNF > BUILT_IN_COPYSIGN > BUILT_IN_COPYSIGNL) >+(define_operator_list CABS BUILT_IN_CABSF BUILT_IN_CABS >BUILT_IN_CABSL) > > /* Simplifications of operations with one constant operand and > simplifications to constants or single values. */ >@@ -392,6 +393,13 @@ along with GCC; see the file COPYING3. If not see > (ccoss (negate @0)) > (ccoss @0))) > >+/* cabs(-x) and cos(conj(x)) -> cabs(x). */ >+(for ops (conj negate) >+ (for cabss (CABS) >+ (simplify >+ (cabss (ops @0)) >+ (cabss @0)))) >+ > /* X % Y is smaller than Y. */ > (for cmp (lt ge) > (simplify >@@ -2336,6 +2344,11 @@ along with GCC; see the file COPYING3. If not >see > (cbrts (exps @0)) > (exps (mult @0 { build_real_truncate (type, dconst_third ()); }))))) > >+/* cabs(x+0i) or cabs(0+xi) -> abs(x). */ >+(simplify >+ (CABS (complex:c @0 real_zerop@1)) >+ (abs @0)) >+ >/* Canonicalization of sequences of math builtins. These rules >represent > IL simplifications but are not necessarily optimizations. > >@@ -2427,7 +2440,12 @@ along with GCC; see the file COPYING3. If not >see > /* cbrt(pow(x,y)) -> pow(x,y/3), iff x is nonnegative. */ > (simplify > (cbrts (pows tree_expr_nonnegative_p@0 @1)) >- (pows @0 (mult @1 { build_real_truncate (type, dconst_third ()); >}))))) >+ (pows @0 (mult @1 { build_real_truncate (type, dconst_third ()); >})))) >+ >+ /* cabs(x+xi) -> fabs(x)*sqrt(2). */ >+ (simplify >+ (CABS (complex @0 @0)) >+ (mult (abs @0) { build_real_truncate (type, dconst_sqrt2 ()); }))) > > /* Narrowing of arithmetic and logical operations. > >diff --git a/gcc/testsuite/gcc.dg/lto/20110201-1_0.c >b/gcc/testsuite/gcc.dg/lto/20110201-1_0.c >index 5073a50..068dddc 100644 >--- a/gcc/testsuite/gcc.dg/lto/20110201-1_0.c >+++ b/gcc/testsuite/gcc.dg/lto/20110201-1_0.c >@@ -1,6 +1,8 @@ > /* { dg-lto-do run } */ > /* { dg-lto-options { { -O0 -flto } } } */ >+/* { dg-lto-options { "-O0 -flto -mfloat-abi=softfp -mfpu=neon-vfpv4" >} { target arm*-*-* } } */ > /* { dg-require-linker-plugin "" } */ >+/* { dg-require-effective-target sqrt_insn } */ > > /* We require a linker plugin because otherwise we'd need to link > against libm which we are not sure here has cabs on all targets. >@@ -16,13 +18,4 @@ foo (_Complex double x, int b) > return cabs(x); > } > >-/* We provide a dummy sqrt to avoid link failures on targets that do >not >- expand sqrt inline. Note that we do not link against libm in order >- to ensure cabs is not satisfied by the library, but must be folded. > */ >-double __attribute__((used)) >-sqrt (double x) >-{ >- return x; >-} >- > int main() { return 0; }