On Wed, Apr 06, 2016 at 02:54:31PM +0200, Jakub Jelinek wrote: > Bootstrapped/regtested on x86_64-linux and i686-linux, committed to trunk. > I'll try to coordinate with Intel about 3) as well as the default alignment > if aligned clause is used on declare simd without any explicit alignment.
I've missed in the ABI spec that unlike SSE2/AVX/AVX2 entrypoints, the AVX512F in the masked case (i.e. _ZGVeM*) need to pass the mask in integer registers rather than in vectors of characteristic type, either in unsigned int or unsigned long long (the latter only for QImode characteristic type), and sometimes in more than one of these (the rule is that there should be as many mask parameters as there are vectors of the characteristic type). This doesn't generate perfect code right now, e.g. even for simple #pragma omp declare simd int bar (int a, int b) { return a + b; } we for _ZGVeM16* generate: leaq 8(%rsp), %r10 andq $-64, %rsp vpbroadcastd %edi, %zmm2 vpaddd %zmm1, %zmm0, %zmm0 vpxord %zmm1, %zmm1, %zmm1 pushq -8(%r10) pushq %rbp movq %rsp, %rbp pushq %r10 subq $112, %rsp vpsrlvd .LC0(%rip), %zmm2, %zmm2 vpandd .LC1(%rip), %zmm2, %zmm2 vpcmpd $4, %zmm1, %zmm2, %k1 kmovw %k1, %eax testw %ax, %ax je .L65 vmovdqa32 %zmm0, -112(%rbp){%k1} vmovdqa64 -112(%rbp), %zmm0 addq $112, %rsp popq %r10 popq %rbp leaq -8(%r10), %rsp where it really should do: kmovw %edi, %k1 vpaddd %zmm1, %zmm0, %zmm0{z}{%k1} or so, but perhaps we should in the vectorizer recognize vect_cst__50 = {mask.321_7(D), mask.321_7(D), mask.321_7(D), mask.321_7(D), mask.321_7(D), mask.321_7(D), mask.321_7(D), mask.321_7(D), mask.321_7(D), mask.321_7(D), mask.321_7(D), mask.321_7(D), mask.321_7(D), mask.321_7(D), mask.321_7(D), mask.321_7(D)}; vect__8.627_51 = vect_cst__50 >> { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }; vect__9.628_53 = vect__8.627_51 & { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }; mask__36.631_57 = vect__9.628_53 != { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; for VECTOR_BOOLEAN_P mask__36.631 as mask__36.631_57 = VIEW_CONVERT_EXPR <mask.321_7(D)>; and eventually handle even more complex cases. That said, we still don't use the masked clones in the vectorizer (I thought it went in, but apparently it didn't, will need to look for the discussions), so it is not top priority right now, just what's important is to get the ABI right. Thus, I've committed following fix after bootstrapping/regtesting on x86_64-linux and i686-linux: 2016-04-07 Jakub Jelinek <ja...@redhat.com> * cgraph.h (struct cgraph_simd_clone): Add mask_mode field. * omp-low.c (simd_clone_init_simd_arrays, simd_clone_adjust): Handle node->simdclone->mask_mode != VOIDmode masks. (simd_clone_adjust_argument_types): Likewise. Move sc var definition earlier, use it instead of node->simdclone. * config/i386/i386.c (ix86_simd_clone_compute_vecsize_and_simdlen): Set clonei->mask_mode. * c-c++-common/attr-simd.c: Add scan-assembler* directives for AVX512F clones. * c-c++-common/attr-simd-2.c: Likewise. * c-c++-common/attr-simd-4.c: Likewise. * gcc.dg/gomp/simd-clones-2.c: Likewise. * gcc.dg/gomp/simd-clones-3.c: Likewise. --- gcc/cgraph.h.jj 2016-04-04 12:28:41.000000000 +0200 +++ gcc/cgraph.h 2016-04-07 10:56:36.534410726 +0200 @@ -766,6 +766,11 @@ struct GTY(()) cgraph_simd_clone { /* Max hardware vector size in bits for floating point vectors. */ unsigned int vecsize_float; + /* Machine mode of the mask argument(s), if they are to be passed + as bitmasks in integer argument(s). VOIDmode if masks are passed + as vectors of characteristic type. */ + machine_mode mask_mode; + /* The mangling character for a given vector size. This is used to determine the ISA mangling bit as specified in the Intel Vector ABI. */ --- gcc/omp-low.c.jj 2016-04-06 14:40:57.000000000 +0200 +++ gcc/omp-low.c 2016-04-07 21:32:47.633630411 +0200 @@ -18916,7 +18916,9 @@ simd_clone_adjust_argument_types (struct adjustments.create (args.length ()); unsigned i, j, veclen; struct ipa_parm_adjustment adj; - for (i = 0; i < node->simdclone->nargs; ++i) + struct cgraph_simd_clone *sc = node->simdclone; + + for (i = 0; i < sc->nargs; ++i) { memset (&adj, 0, sizeof (adj)); tree parm = args[i]; @@ -18924,10 +18926,10 @@ simd_clone_adjust_argument_types (struct adj.base_index = i; adj.base = parm; - node->simdclone->args[i].orig_arg = node->definition ? parm : NULL_TREE; - node->simdclone->args[i].orig_type = parm_type; + sc->args[i].orig_arg = node->definition ? parm : NULL_TREE; + sc->args[i].orig_type = parm_type; - switch (node->simdclone->args[i].arg_type) + switch (sc->args[i].arg_type) { default: /* No adjustment necessary for scalar arguments. */ @@ -18936,29 +18938,29 @@ simd_clone_adjust_argument_types (struct case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP: case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP: if (node->definition) - node->simdclone->args[i].simd_array + sc->args[i].simd_array = create_tmp_simd_array (IDENTIFIER_POINTER (DECL_NAME (parm)), TREE_TYPE (parm_type), - node->simdclone->simdlen); + sc->simdlen); adj.op = IPA_PARM_OP_COPY; break; case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP: case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP: case SIMD_CLONE_ARG_TYPE_VECTOR: if (INTEGRAL_TYPE_P (parm_type) || POINTER_TYPE_P (parm_type)) - veclen = node->simdclone->vecsize_int; + veclen = sc->vecsize_int; else - veclen = node->simdclone->vecsize_float; + veclen = sc->vecsize_float; veclen /= GET_MODE_BITSIZE (TYPE_MODE (parm_type)); - if (veclen > node->simdclone->simdlen) - veclen = node->simdclone->simdlen; + if (veclen > sc->simdlen) + veclen = sc->simdlen; adj.arg_prefix = "simd"; if (POINTER_TYPE_P (parm_type)) adj.type = build_vector_type (pointer_sized_int_node, veclen); else adj.type = build_vector_type (parm_type, veclen); - node->simdclone->args[i].vector_type = adj.type; - for (j = veclen; j < node->simdclone->simdlen; j += veclen) + sc->args[i].vector_type = adj.type; + for (j = veclen; j < sc->simdlen; j += veclen) { adjustments.safe_push (adj); if (j == veclen) @@ -18967,23 +18969,21 @@ simd_clone_adjust_argument_types (struct adj.op = IPA_PARM_OP_NEW; adj.arg_prefix = "simd"; adj.base_index = i; - adj.type = node->simdclone->args[i].vector_type; + adj.type = sc->args[i].vector_type; } } if (node->definition) - node->simdclone->args[i].simd_array + sc->args[i].simd_array = create_tmp_simd_array (IDENTIFIER_POINTER (DECL_NAME (parm)), - parm_type, node->simdclone->simdlen); + parm_type, sc->simdlen); } adjustments.safe_push (adj); } - if (node->simdclone->inbranch) + if (sc->inbranch) { - tree base_type - = simd_clone_compute_base_data_type (node->simdclone->origin, - node->simdclone); + tree base_type = simd_clone_compute_base_data_type (sc->origin, sc); memset (&adj, 0, sizeof (adj)); adj.op = IPA_PARM_OP_NEW; @@ -18991,31 +18991,41 @@ simd_clone_adjust_argument_types (struct adj.base_index = i; if (INTEGRAL_TYPE_P (base_type) || POINTER_TYPE_P (base_type)) - veclen = node->simdclone->vecsize_int; + veclen = sc->vecsize_int; else - veclen = node->simdclone->vecsize_float; + veclen = sc->vecsize_float; veclen /= GET_MODE_BITSIZE (TYPE_MODE (base_type)); - if (veclen > node->simdclone->simdlen) - veclen = node->simdclone->simdlen; - if (POINTER_TYPE_P (base_type)) + if (veclen > sc->simdlen) + veclen = sc->simdlen; + if (sc->mask_mode != VOIDmode) + adj.type + = lang_hooks.types.type_for_mode (sc->mask_mode, 1); + else if (POINTER_TYPE_P (base_type)) adj.type = build_vector_type (pointer_sized_int_node, veclen); else adj.type = build_vector_type (base_type, veclen); adjustments.safe_push (adj); - for (j = veclen; j < node->simdclone->simdlen; j += veclen) + for (j = veclen; j < sc->simdlen; j += veclen) adjustments.safe_push (adj); /* We have previously allocated one extra entry for the mask. Use it and fill it. */ - struct cgraph_simd_clone *sc = node->simdclone; sc->nargs++; + if (sc->mask_mode != VOIDmode) + base_type = boolean_type_node; if (node->definition) { sc->args[i].orig_arg = build_decl (UNKNOWN_LOCATION, PARM_DECL, NULL, base_type); - sc->args[i].simd_array - = create_tmp_simd_array ("mask", base_type, sc->simdlen); + if (sc->mask_mode == VOIDmode) + sc->args[i].simd_array + = create_tmp_simd_array ("mask", base_type, sc->simdlen); + else if (veclen < sc->simdlen) + sc->args[i].simd_array + = create_tmp_simd_array ("mask", adj.type, sc->simdlen / veclen); + else + sc->args[i].simd_array = NULL_TREE; } sc->args[i].orig_type = base_type; sc->args[i].arg_type = SIMD_CLONE_ARG_TYPE_MASK; @@ -19083,6 +19093,27 @@ simd_clone_init_simd_arrays (struct cgra node->simdclone->args[i].vector_arg = arg; tree array = node->simdclone->args[i].simd_array; + if (node->simdclone->mask_mode != VOIDmode + && node->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_MASK) + { + if (array == NULL_TREE) + continue; + unsigned int l + = tree_to_uhwi (TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (array)))); + for (k = 0; k <= l; k++) + { + if (k) + { + arg = DECL_CHAIN (arg); + j++; + } + tree t = build4 (ARRAY_REF, TREE_TYPE (TREE_TYPE (array)), + array, size_int (k), NULL, NULL); + t = build2 (MODIFY_EXPR, TREE_TYPE (t), t, arg); + gimplify_and_add (t, &seq); + } + continue; + } if (TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg)) == node->simdclone->simdlen) { tree ptype = build_pointer_type (TREE_TYPE (TREE_TYPE (array))); @@ -19453,7 +19484,7 @@ simd_clone_adjust (struct cgraph_node *n e->probability = REG_BR_PROB_BASE; gsi = gsi_last_bb (incr_bb); gimple *g = gimple_build_assign (iter2, PLUS_EXPR, iter1, - build_int_cst (unsigned_type_node, 1)); + build_int_cst (unsigned_type_node, 1)); gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING); /* Mostly annotate the loop for the vectorizer (the rest is done below). */ @@ -19469,21 +19500,68 @@ simd_clone_adjust (struct cgraph_node *n gimple_stmt_iterator gsi = gsi_last_bb (loop->header); tree mask_array = node->simdclone->args[node->simdclone->nargs - 1].simd_array; - tree mask = make_ssa_name (TREE_TYPE (TREE_TYPE (mask_array))); - tree aref = build4 (ARRAY_REF, - TREE_TYPE (TREE_TYPE (mask_array)), - mask_array, iter1, - NULL, NULL); - g = gimple_build_assign (mask, aref); - gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING); - int bitsize = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (aref))); - if (!INTEGRAL_TYPE_P (TREE_TYPE (aref))) - { - aref = build1 (VIEW_CONVERT_EXPR, - build_nonstandard_integer_type (bitsize, 0), mask); - mask = make_ssa_name (TREE_TYPE (aref)); + tree mask; + if (node->simdclone->mask_mode != VOIDmode) + { + tree shift_cnt; + if (mask_array == NULL_TREE) + { + tree arg = node->simdclone->args[node->simdclone->nargs + - 1].vector_arg; + mask = get_or_create_ssa_default_def (cfun, arg); + shift_cnt = iter1; + } + else + { + tree maskt = TREE_TYPE (mask_array); + int c = tree_to_uhwi (TYPE_MAX_VALUE (TYPE_DOMAIN (maskt))); + c = node->simdclone->simdlen / (c + 1); + int s = exact_log2 (c); + gcc_assert (s > 0); + c--; + tree idx = make_ssa_name (TREE_TYPE (iter1)); + g = gimple_build_assign (idx, RSHIFT_EXPR, iter1, + build_int_cst (NULL_TREE, s)); + gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING); + mask = make_ssa_name (TREE_TYPE (TREE_TYPE (mask_array))); + tree aref = build4 (ARRAY_REF, + TREE_TYPE (TREE_TYPE (mask_array)), + mask_array, idx, NULL, NULL); + g = gimple_build_assign (mask, aref); + gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING); + shift_cnt = make_ssa_name (TREE_TYPE (iter1)); + g = gimple_build_assign (shift_cnt, BIT_AND_EXPR, iter1, + build_int_cst (TREE_TYPE (iter1), c)); + gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING); + } + g = gimple_build_assign (make_ssa_name (TREE_TYPE (mask)), + RSHIFT_EXPR, mask, shift_cnt); + gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING); + mask = gimple_assign_lhs (g); + g = gimple_build_assign (make_ssa_name (TREE_TYPE (mask)), + BIT_AND_EXPR, mask, + build_int_cst (TREE_TYPE (mask), 1)); + gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING); + mask = gimple_assign_lhs (g); + } + else + { + mask = make_ssa_name (TREE_TYPE (TREE_TYPE (mask_array))); + tree aref = build4 (ARRAY_REF, + TREE_TYPE (TREE_TYPE (mask_array)), + mask_array, iter1, NULL, NULL); g = gimple_build_assign (mask, aref); gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING); + int bitsize = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (aref))); + if (!INTEGRAL_TYPE_P (TREE_TYPE (aref))) + { + aref = build1 (VIEW_CONVERT_EXPR, + build_nonstandard_integer_type (bitsize, 0), + mask); + mask = make_ssa_name (TREE_TYPE (aref)); + g = gimple_build_assign (mask, aref); + gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING); + } } g = gimple_build_cond (EQ_EXPR, mask, build_zero_cst (TREE_TYPE (mask)), --- gcc/config/i386/i386.c.jj 2016-04-06 14:46:29.000000000 +0200 +++ gcc/config/i386/i386.c 2016-04-07 11:30:26.988067880 +0200 @@ -53747,7 +53747,7 @@ ix86_memmodel_check (unsigned HOST_WIDE_ return val; } -/* Set CLONEI->vecsize_mangle, CLONEI->vecsize_int, +/* Set CLONEI->vecsize_mangle, CLONEI->mask_mode, CLONEI->vecsize_int, CLONEI->vecsize_float and if CLONEI->simdlen is 0, also CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted, or number of vecsize_mangle variants that should be emitted. */ @@ -53834,6 +53834,7 @@ ix86_simd_clone_compute_vecsize_and_simd clonei->vecsize_mangle = "bcde"[num]; ret = 4; } + clonei->mask_mode = VOIDmode; switch (clonei->vecsize_mangle) { case 'b': @@ -53851,6 +53852,10 @@ ix86_simd_clone_compute_vecsize_and_simd case 'e': clonei->vecsize_int = 512; clonei->vecsize_float = 512; + if (TYPE_MODE (base_type) == QImode) + clonei->mask_mode = DImode; + else + clonei->mask_mode = SImode; break; } if (clonei->simdlen == 0) --- gcc/testsuite/c-c++-common/attr-simd.c.jj 2015-11-20 08:17:52.000000000 +0100 +++ gcc/testsuite/c-c++-common/attr-simd.c 2016-04-07 21:34:35.796149182 +0200 @@ -18,6 +18,8 @@ int simd_attr (void) /* { dg-final { scan-assembler-times "_ZGVcM4_simd_attr:" 1 { target { i?86-*-* x86_64-*-* } } } } */ /* { dg-final { scan-assembler-times "_ZGVdN8_simd_attr:" 1 { target { i?86-*-* x86_64-*-* } } } } */ /* { dg-final { scan-assembler-times "_ZGVdM8_simd_attr:" 1 { target { i?86-*-* x86_64-*-* } } } } */ +/* { dg-final { scan-assembler-times "_ZGVeN16_simd_attr:" 1 { target { i?86-*-* x86_64-*-* } } } } */ +/* { dg-final { scan-assembler-times "_ZGVeM16_simd_attr:" 1 { target { i?86-*-* x86_64-*-* } } } } */ extern #ifdef __cplusplus @@ -36,3 +38,5 @@ int simd_attr2 (void) /* { dg-final { scan-assembler-times "_ZGVcM4_simd_attr2:" 1 { target { i?86-*-* x86_64-*-* } } } } */ /* { dg-final { scan-assembler-times "_ZGVdN8_simd_attr2:" 1 { target { i?86-*-* x86_64-*-* } } } } */ /* { dg-final { scan-assembler-times "_ZGVdM8_simd_attr2:" 1 { target { i?86-*-* x86_64-*-* } } } } */ +/* { dg-final { scan-assembler-times "_ZGVeN16_simd_attr2:" 1 { target { i?86-*-* x86_64-*-* } } } } */ +/* { dg-final { scan-assembler-times "_ZGVeM16_simd_attr2:" 1 { target { i?86-*-* x86_64-*-* } } } } */ --- gcc/testsuite/c-c++-common/attr-simd-2.c.jj 2015-11-18 11:19:20.000000000 +0100 +++ gcc/testsuite/c-c++-common/attr-simd-2.c 2016-04-07 21:34:54.529892634 +0200 @@ -19,3 +19,5 @@ int simd_attr (void) /* { dg-final { scan-assembler-times "_ZGVcM4_simd_attr:" 1 { target { i?86-*-* x86_64-*-* } } } } */ /* { dg-final { scan-assembler-times "_ZGVdN8_simd_attr:" 1 { target { i?86-*-* x86_64-*-* } } } } */ /* { dg-final { scan-assembler-times "_ZGVdM8_simd_attr:" 1 { target { i?86-*-* x86_64-*-* } } } } */ +/* { dg-final { scan-assembler-times "_ZGVeN16_simd_attr:" 1 { target { i?86-*-* x86_64-*-* } } } } */ +/* { dg-final { scan-assembler-times "_ZGVeM16_simd_attr:" 1 { target { i?86-*-* x86_64-*-* } } } } */ --- gcc/testsuite/c-c++-common/attr-simd-4.c.jj 2015-12-07 12:17:55.000000000 +0100 +++ gcc/testsuite/c-c++-common/attr-simd-4.c 2016-04-07 21:36:30.975570536 +0200 @@ -15,9 +15,11 @@ int simd_attr (void) /* { dg-final { scan-assembler-times "_ZGVbN4_simd_attr:" 1 { target { i?86-*-* x86_64-*-* } } } } */ /* { dg-final { scan-assembler-times "_ZGVcN4_simd_attr:" 1 { target { i?86-*-* x86_64-*-* } } } } */ /* { dg-final { scan-assembler-times "_ZGVdN8_simd_attr:" 1 { target { i?86-*-* x86_64-*-* } } } } */ +/* { dg-final { scan-assembler-times "_ZGVeN16_simd_attr:" 1 { target { i?86-*-* x86_64-*-* } } } } */ /* { dg-final { scan-assembler-not "_ZGVbM4_simd_attr:" { target { i?86-*-* x86_64-*-* } } } } */ /* { dg-final { scan-assembler-not "_ZGVcM4_simd_attr:" { target { i?86-*-* x86_64-*-* } } } } */ /* { dg-final { scan-assembler-not "_ZGVdM8_simd_attr:" { target { i?86-*-* x86_64-*-* } } } } */ +/* { dg-final { scan-assembler-not "_ZGVeM16_simd_attr:" { target { i?86-*-* x86_64-*-* } } } } */ extern #ifdef __cplusplus @@ -33,6 +35,8 @@ int simd_attr2 (void) /* { dg-final { scan-assembler-not "_ZGVbN4_simd_attr2:" { target { i?86-*-* x86_64-*-* } } } } */ /* { dg-final { scan-assembler-not "_ZGVcN4_simd_attr2:" { target { i?86-*-* x86_64-*-* } } } } */ /* { dg-final { scan-assembler-not "_ZGVdN8_simd_attr2:" { target { i?86-*-* x86_64-*-* } } } } */ +/* { dg-final { scan-assembler-not "_ZGVeN16_simd_attr2:" { target { i?86-*-* x86_64-*-* } } } } */ /* { dg-final { scan-assembler-times "_ZGVbM4_simd_attr2:" 1 { target { i?86-*-* x86_64-*-* } } } } */ /* { dg-final { scan-assembler-times "_ZGVcM4_simd_attr2:" 1 { target { i?86-*-* x86_64-*-* } } } } */ /* { dg-final { scan-assembler-times "_ZGVdM8_simd_attr2:" 1 { target { i?86-*-* x86_64-*-* } } } } */ +/* { dg-final { scan-assembler-times "_ZGVeM16_simd_attr2:" 1 { target { i?86-*-* x86_64-*-* } } } } */ --- gcc/testsuite/gcc.dg/gomp/simd-clones-2.c.jj 2015-05-29 15:03:14.000000000 +0200 +++ gcc/testsuite/gcc.dg/gomp/simd-clones-2.c 2016-04-07 21:37:27.034801725 +0200 @@ -23,3 +23,6 @@ float setArray(float *a, float x, int k) /* { dg-final { scan-tree-dump "_ZGVdN8ua32vl_setArray" "optimized" { target i?86-*-* x86_64-*-* } } } */ /* { dg-final { scan-tree-dump "_ZGVdN8vvva32_addit" "optimized" { target i?86-*-* x86_64-*-* } } } */ /* { dg-final { scan-tree-dump "_ZGVdM8vl66u_addit" "optimized" { target i?86-*-* x86_64-*-* } } } */ +/* { dg-final { scan-tree-dump "_ZGVeN16ua32vl_setArray" "optimized" { target i?86-*-* x86_64-*-* } } } */ +/* { dg-final { scan-tree-dump "_ZGVeN16vvva32_addit" "optimized" { target i?86-*-* x86_64-*-* } } } */ +/* { dg-final { scan-tree-dump "_ZGVeM16vl66u_addit" "optimized" { target i?86-*-* x86_64-*-* } } } */ --- gcc/testsuite/gcc.dg/gomp/simd-clones-3.c.jj 2015-05-29 15:03:14.000000000 +0200 +++ gcc/testsuite/gcc.dg/gomp/simd-clones-3.c 2016-04-07 21:38:03.712298720 +0200 @@ -15,3 +15,5 @@ int addit(int a, int b, int c) /* { dg-final { scan-tree-dump "_ZGVcM4vvv_addit" "optimized" { target i?86-*-* x86_64-*-* } } } */ /* { dg-final { scan-tree-dump "_ZGVdN8vvv_addit" "optimized" { target i?86-*-* x86_64-*-* } } } */ /* { dg-final { scan-tree-dump "_ZGVdM8vvv_addit" "optimized" { target i?86-*-* x86_64-*-* } } } */ +/* { dg-final { scan-tree-dump "_ZGVeN16vvv_addit" "optimized" { target i?86-*-* x86_64-*-* } } } */ +/* { dg-final { scan-tree-dump "_ZGVeM16vvv_addit" "optimized" { target i?86-*-* x86_64-*-* } } } */ Jakub