The following fixes PR85597 in an easy backportable way, the proper fix involves refactoring of the vect_get_vec_defs interface.
Bootstrap and regtest running on x86_64-unknown-linux-gnu. Richard. 2018-05-02 Richard Biener <rguent...@suse.de> PR tree-optimization/85597 * tree-vect-stmts.c (vectorizable_operation): For ternary SLP do not use split vect_get_vec_defs call but call vect_get_slp_defs directly. * gcc.dg/vect/pr85597.c: New testcase. Index: gcc/tree-vect-stmts.c =================================================================== --- gcc/tree-vect-stmts.c (revision 259835) +++ gcc/tree-vect-stmts.c (working copy) @@ -5923,15 +5969,34 @@ vectorizable_operation (gimple *stmt, gi /* Handle uses. */ if (j == 0) { - if (op_type == binary_op || op_type == ternary_op) + if (op_type == binary_op) vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1, slp_node); + else if (op_type == ternary_op) + { + if (slp_node) + { + auto_vec<tree> ops(3); + ops.quick_push (op0); + ops.quick_push (op1); + ops.quick_push (op2); + auto_vec<vec<tree> > vec_defs(3); + vect_get_slp_defs (ops, slp_node, &vec_defs); + vec_oprnds0 = vec_defs[0]; + vec_oprnds1 = vec_defs[1]; + vec_oprnds2 = vec_defs[2]; + } + else + { + vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1, + NULL); + vect_get_vec_defs (op2, NULL_TREE, stmt, &vec_oprnds2, NULL, + NULL); + } + } else vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL, slp_node); - if (op_type == ternary_op) - vect_get_vec_defs (op2, NULL_TREE, stmt, &vec_oprnds2, NULL, - slp_node); } else { Index: gcc/testsuite/gcc.dg/vect/pr85597.c =================================================================== --- gcc/testsuite/gcc.dg/vect/pr85597.c (nonexistent) +++ gcc/testsuite/gcc.dg/vect/pr85597.c (working copy) @@ -0,0 +1,26 @@ +/* { dg-do compile } */ +/* { dg-options "-O3" } */ +/* { dg-additional-options "-mfma" { target { x86_64-*-* i?86-*-* } } } */ + +extern double fma (double, double, double); + +static inline void +bar (int i, double *D, double *S) +{ + while (i-- > 0) + { + D[0] = fma (1, S[0], D[0]); + D[1] = fma (1, S[1], D[1]); + D[2] = fma (1, S[2], D[2]); + D[3] = fma (1, S[3], D[3]); + D += 4; + S += 4; + } +} + +void +foo (double *d, double *s) +{ + bar (10, d, s); +} +