Summary: Add a new fast_mult_optab to define a pattern corresponding to the fast path of a IEEE compliant multiplication. Indeed, the backend programmer can change the fast path without having to handle manually the IEEE checks.
gcc/ChangeLog: * internal-fn.def: Add a FAST_MULT internal fn * optabs.def: Add fast_mult_optab * tree-complex.cc (expand_complex_multiplication_components): Adapt complex multiplication expand to generate FAST_MULT internal fn (expand_complex_multiplication): Likewise (expand_complex_operations_1): Likewise --- gcc/internal-fn.def | 1 + gcc/optabs.def | 1 + gcc/tree-complex.cc | 70 +++++++++++++++++++++++++++++---------------- 3 files changed, 47 insertions(+), 25 deletions(-) diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def index 0ac6cd98a4f..f1046996a48 100644 --- a/gcc/internal-fn.def +++ b/gcc/internal-fn.def @@ -396,6 +396,7 @@ DEF_INTERNAL_OPTAB_FN (COMPLEX_ADD_ROT90, ECF_CONST, cadd90, binary) DEF_INTERNAL_OPTAB_FN (COMPLEX_ADD_ROT270, ECF_CONST, cadd270, binary) DEF_INTERNAL_OPTAB_FN (COMPLEX_MUL, ECF_CONST, cmul, binary) DEF_INTERNAL_OPTAB_FN (COMPLEX_MUL_CONJ, ECF_CONST, cmul_conj, binary) +DEF_INTERNAL_OPTAB_FN (FAST_MULT, ECF_CONST, fast_mul, binary) DEF_INTERNAL_OPTAB_FN (VEC_ADDSUB, ECF_CONST, vec_addsub, binary) DEF_INTERNAL_WIDENING_OPTAB_FN (VEC_WIDEN_PLUS, ECF_CONST | ECF_NOTHROW, diff --git a/gcc/optabs.def b/gcc/optabs.def index d146cac5eec..a90b6ee6440 100644 --- a/gcc/optabs.def +++ b/gcc/optabs.def @@ -344,6 +344,7 @@ OPTAB_D (cmla_optab, "cmla$a4") OPTAB_D (cmla_conj_optab, "cmla_conj$a4") OPTAB_D (cmls_optab, "cmls$a4") OPTAB_D (cmls_conj_optab, "cmls_conj$a4") +OPTAB_D (fast_mul_optab, "fast_mul$a3") OPTAB_D (cos_optab, "cos$a2") OPTAB_D (cosh_optab, "cosh$a2") OPTAB_D (exp10_optab, "exp10$a2") diff --git a/gcc/tree-complex.cc b/gcc/tree-complex.cc index d814e407af6..16759f1f3ba 100644 --- a/gcc/tree-complex.cc +++ b/gcc/tree-complex.cc @@ -1138,25 +1138,36 @@ expand_complex_libcall (gimple_stmt_iterator *gsi, tree type, tree ar, tree ai, static void expand_complex_multiplication_components (gimple_seq *stmts, location_t loc, - tree type, tree ar, tree ai, - tree br, tree bi, - tree *rr, tree *ri) + tree type, tree ac, tree ar, + tree ai, tree bc, tree br, tree bi, + tree *rr, tree *ri, + bool fast_mult) { - tree t1, t2, t3, t4; + tree inner_type = TREE_TYPE (type); + if (!fast_mult) + { + tree t1, t2, t3, t4; - t1 = gimple_build (stmts, loc, MULT_EXPR, type, ar, br); - t2 = gimple_build (stmts, loc, MULT_EXPR, type, ai, bi); - t3 = gimple_build (stmts, loc, MULT_EXPR, type, ar, bi); + t1 = gimple_build (stmts, loc, MULT_EXPR, inner_type, ar, br); + t2 = gimple_build (stmts, loc, MULT_EXPR, inner_type, ai, bi); + t3 = gimple_build (stmts, loc, MULT_EXPR, inner_type, ar, bi); - /* Avoid expanding redundant multiplication for the common - case of squaring a complex number. */ - if (ar == br && ai == bi) - t4 = t3; - else - t4 = gimple_build (stmts, loc, MULT_EXPR, type, ai, br); + /* Avoid expanding redundant multiplication for the common + case of squaring a complex number. */ + if (ar == br && ai == bi) + t4 = t3; + else + t4 = gimple_build (stmts, loc, MULT_EXPR, inner_type, ai, br); - *rr = gimple_build (stmts, loc, MINUS_EXPR, type, t1, t2); - *ri = gimple_build (stmts, loc, PLUS_EXPR, type, t3, t4); + *rr = gimple_build (stmts, loc, MINUS_EXPR, inner_type, t1, t2); + *ri = gimple_build (stmts, loc, PLUS_EXPR, inner_type, t3, t4); + } + else + { + tree rc = gimple_build (stmts, loc, CFN_FAST_MULT, type, ac, bc); + *rr = gimple_build (stmts, loc, REALPART_EXPR, inner_type, rc); + *ri = gimple_build (stmts, loc, IMAGPART_EXPR, inner_type, rc); + } } /* Expand complex multiplication to scalars: @@ -1165,13 +1176,18 @@ expand_complex_multiplication_components (gimple_seq *stmts, location_t loc, static void expand_complex_multiplication (gimple_stmt_iterator *gsi, tree type, - tree ar, tree ai, tree br, tree bi, + tree ac, tree ar, tree ai, + tree bc, tree br, tree bi, complex_lattice_t al, complex_lattice_t bl) { tree rr, ri; tree inner_type = TREE_TYPE (type); location_t loc = gimple_location (gsi_stmt (*gsi)); gimple_seq stmts = NULL; + bool fast_mult = direct_internal_fn_supported_p (IFN_FAST_MULT, type, + bb_optimization_type + (gimple_bb + (gsi_stmt (*gsi)))); if (al < bl) { @@ -1232,9 +1248,10 @@ expand_complex_multiplication (gimple_stmt_iterator *gsi, tree type, { /* If we are not worrying about NaNs expand to (ar*br - ai*bi) + i(ar*bi + br*ai) directly. */ - expand_complex_multiplication_components (&stmts, loc, inner_type, - ar, ai, br, bi, - &rr, &ri); + expand_complex_multiplication_components (&stmts, loc, type, + ac, ar, ai, bc, br, + bi, &rr, &ri, + fast_mult); break; } @@ -1245,8 +1262,9 @@ expand_complex_multiplication (gimple_stmt_iterator *gsi, tree type, tree tmpr, tmpi; expand_complex_multiplication_components (&stmts, loc, - inner_type, ar, ai, - br, bi, &tmpr, &tmpi); + type, ac, ar, ai, + bc, br, bi, &tmpr, &tmpi, + fast_mult); gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); stmts = NULL; @@ -1297,10 +1315,11 @@ expand_complex_multiplication (gimple_stmt_iterator *gsi, tree type, } else /* If we are not worrying about NaNs expand to - (ar*br - ai*bi) + i(ar*bi + br*ai) directly. */ + (ar*br - ai*bi) + i(ar*bi + br*ai) directly. */ expand_complex_multiplication_components (&stmts, loc, - inner_type, ar, ai, - br, bi, &rr, &ri); + type, ac, ar, ai, + bc, br, bi, &rr, &ri, + fast_mult); break; default: @@ -2096,7 +2115,8 @@ expand_complex_operations_1 (gimple_stmt_iterator *gsi) break; case MULT_EXPR: - expand_complex_multiplication (gsi, type, ar, ai, br, bi, al, bl); + expand_complex_multiplication (gsi, type, ac, ar, ai, bc, br, bi, al, + bl); break; case TRUNC_DIV_EXPR: -- 2.17.1