On Wed, Dec 10, 2014 at 5:20 PM, Ilya Tocar <tocarip.in...@gmail.com> wrote: > Hi, > > Patch bellow adds march/mtune/attribute=knl. > For now this is just silvermont tuning and avx/avx2/avx512 support. > Ok for trunk? > > gcc/ > * config.gcc: Support "knl". > * config/i386/driver-i386.c (host_detect_local_cpu): Detect "knl". > * config/i386/i386-c.c (ix86_target_macros_internal): Handle > PROCESSOR_KNL. > * config/i386/i386.c (m_KNL): Define. > (processor_target_table): Add "knl". > (PTA_KNL): Define. > (ix86_issue_rate): Add PROCESSOR_KNL. > (ix86_adjust_cost): Ditto. > (ia32_multipass_dfa_lookahead): Ditto. > (get_builtin_code_for_version): Handle "knl". > (fold_builtin_cpu): Ditto. > * config/i386/i386.h (TARGET_KNL): Define. > (processor_type): Add PROCESSOR_KNL. > * config/i386/i386.md (attr "cpu"): Add knl. > * config/i386/x86-tune.def: Add m_KNL. > > gcc/testsuite/ > * gcc.target/i386/funcspec-5.c: Test avx512f and knl.
OK with a small comment nit below. Thanks, Uros. > > --- > gcc/config.gcc | 3 +- > gcc/config/i386/driver-i386.c | 6 +++- > gcc/config/i386/i386-c.c | 7 +++++ > gcc/config/i386/i386.c | 17 ++++++++++- > gcc/config/i386/i386.h | 2 ++ > gcc/config/i386/i386.md | 2 +- > gcc/config/i386/x86-tune.def | 47 > +++++++++++++++--------------- > gcc/testsuite/gcc.target/i386/funcspec-5.c | 3 ++ > 8 files changed, 60 insertions(+), 27 deletions(-) > > diff --git a/gcc/config.gcc b/gcc/config.gcc > index fa3e1fc..8541274 100644 > --- a/gcc/config.gcc > +++ b/gcc/config.gcc > @@ -591,7 +591,8 @@ pentium4 pentium4m pentiumpro prescott" > x86_64_archs="amdfam10 athlon64 athlon64-sse3 barcelona bdver1 bdver2 \ > bdver3 bdver4 btver1 btver2 k8 k8-sse3 opteron opteron-sse3 nocona \ > core2 corei7 corei7-avx core-avx-i core-avx2 atom slm nehalem westmere \ > -sandybridge ivybridge haswell broadwell bonnell silvermont x86-64 native" > +sandybridge ivybridge haswell broadwell bonnell silvermont knl x86-64 \ > +native" > > # Additional x86 processors supported by --with-cpu=. Each processor > # MUST be separated by exactly one space. > diff --git a/gcc/config/i386/driver-i386.c b/gcc/config/i386/driver-i386.c > index a2248ce..69ebebd 100644 > --- a/gcc/config/i386/driver-i386.c > +++ b/gcc/config/i386/driver-i386.c > @@ -747,7 +747,11 @@ const char *host_detect_local_cpu (int argc, const char > **argv) > if (arch) > { > /* This is unknown family 0x6 CPU. */ > - if (has_adx) > + /* Assume Knl. */ /* Assume Knights Landing. */ > + if (has_avx512f) > + cpu = "knl"; > + /* Assume Broadwell. */ > + else if (has_adx) > cpu = "broadwell"; > else if (has_avx2) > /* Assume Haswell. */ > diff --git a/gcc/config/i386/i386-c.c b/gcc/config/i386/i386-c.c > index 3ad7d49..1c604fc3 100644 > --- a/gcc/config/i386/i386-c.c > +++ b/gcc/config/i386/i386-c.c > @@ -171,6 +171,10 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag, > def_or_undef (parse_in, "__silvermont"); > def_or_undef (parse_in, "__silvermont__"); > break; > + case PROCESSOR_KNL: > + def_or_undef (parse_in, "__knl"); > + def_or_undef (parse_in, "__knl__"); > + break; > /* use PROCESSOR_max to not set/unset the arch macro. */ > case PROCESSOR_max: > break; > @@ -277,6 +281,9 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag, > def_or_undef (parse_in, "__tune_slm__"); > def_or_undef (parse_in, "__tune_silvermont__"); > break; > + case PROCESSOR_KNL: > + def_or_undef (parse_in, "__tune_knl__"); > + break; > case PROCESSOR_INTEL: > case PROCESSOR_GENERIC: > break; > diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c > index 1e1716e..f0cbe48 100644 > --- a/gcc/config/i386/i386.c > +++ b/gcc/config/i386/i386.c > @@ -2040,6 +2040,7 @@ const struct processor_costs *ix86_cost = &pentium_cost; > #define m_CORE_ALL (m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_HASWELL) > #define m_BONNELL (1<<PROCESSOR_BONNELL) > #define m_SILVERMONT (1<<PROCESSOR_SILVERMONT) > +#define m_KNL (1<<PROCESSOR_KNL) > #define m_INTEL (1<<PROCESSOR_INTEL) > > #define m_GEODE (1<<PROCESSOR_GEODE) > @@ -2505,6 +2506,7 @@ static const struct ptt > processor_target_table[PROCESSOR_max] = > {"haswell", &core_cost, 16, 10, 16, 10, 16}, > {"bonnell", &atom_cost, 16, 15, 16, 7, 16}, > {"silvermont", &slm_cost, 16, 15, 16, 7, 16}, > + {"knl", &slm_cost, 16, 15, 16, 7, 16}, > {"intel", &intel_cost, 16, 15, 16, 7, 16}, > {"geode", &geode_cost, 0, 0, 0, 0, 0}, > {"k6", &k6_cost, 32, 7, 32, 7, 32}, > @@ -3178,6 +3180,8 @@ ix86_option_override_internal (bool main_args_p, > | PTA_FMA | PTA_MOVBE | PTA_HLE) > #define PTA_BROADWELL \ > (PTA_HASWELL | PTA_ADX | PTA_PRFCHW | PTA_RDSEED) > +#define PTA_KNL \ > + (PTA_BROADWELL | PTA_AVX512PF | PTA_AVX512ER | PTA_AVX512F | PTA_AVX512CD) > #define PTA_BONNELL \ > (PTA_CORE2 | PTA_MOVBE) > #define PTA_SILVERMONT \ > @@ -3241,6 +3245,7 @@ ix86_option_override_internal (bool main_args_p, > {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL}, > {"silvermont", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT}, > {"slm", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT}, > + {"knl", PROCESSOR_KNL, CPU_KNL, PTA_KNL}, > {"intel", PROCESSOR_INTEL, CPU_SLM, PTA_NEHALEM}, > {"geode", PROCESSOR_GEODE, CPU_GEODE, > PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW}, > @@ -25934,6 +25939,7 @@ ix86_issue_rate (void) > case PROCESSOR_PENTIUM: > case PROCESSOR_BONNELL: > case PROCESSOR_SILVERMONT: > + case PROCESSOR_KNL: > case PROCESSOR_INTEL: > case PROCESSOR_K6: > case PROCESSOR_BTVER2: > @@ -26276,6 +26282,7 @@ ix86_adjust_cost (rtx_insn *insn, rtx link, rtx_insn > *dep_insn, int cost) > break; > > case PROCESSOR_SILVERMONT: > + case PROCESSOR_KNL: > case PROCESSOR_INTEL: > if (!reload_completed) > return cost; > @@ -26345,6 +26352,7 @@ ia32_multipass_dfa_lookahead (void) > case PROCESSOR_HASWELL: > case PROCESSOR_BONNELL: > case PROCESSOR_SILVERMONT: > + case PROCESSOR_KNL: > case PROCESSOR_INTEL: > /* Generally, we want haifa-sched:max_issue() to look ahead as far > as many instructions can be executed on a cycle, i.e., > @@ -34246,7 +34254,8 @@ get_builtin_code_for_version (tree decl, tree > *predicate_list) > P_PROC_FMA, > P_AVX2, > P_PROC_AVX2, > - P_AVX512F > + P_AVX512F, > + P_PROC_AVX512F > }; > > enum feature_priority priority = P_ZERO; > @@ -34350,6 +34359,10 @@ get_builtin_code_for_version (tree decl, tree > *predicate_list) > arg_str = "bonnell"; > priority = P_PROC_SSSE3; > break; > + case PROCESSOR_KNL: > + arg_str = "knl"; > + priority = P_PROC_AVX512F; > + break; > case PROCESSOR_SILVERMONT: > arg_str = "silvermont"; > priority = P_PROC_SSE4_2; > @@ -35268,6 +35281,7 @@ fold_builtin_cpu (tree fndecl, tree *args) > M_AMDFAM10H, > M_AMDFAM15H, > M_INTEL_SILVERMONT, > + M_INTEL_KNL, > M_AMD_BTVER1, > M_AMD_BTVER2, > M_CPU_SUBTYPE_START, > @@ -35305,6 +35319,7 @@ fold_builtin_cpu (tree fndecl, tree *args) > {"haswell", M_INTEL_COREI7_HASWELL}, > {"bonnell", M_INTEL_BONNELL}, > {"silvermont", M_INTEL_SILVERMONT}, > + {"knl", M_INTEL_KNL}, > {"amdfam10h", M_AMDFAM10H}, > {"barcelona", M_AMDFAM10H_BARCELONA}, > {"shanghai", M_AMDFAM10H_SHANGHAI}, > diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h > index df7789d..7c35758 100644 > --- a/gcc/config/i386/i386.h > +++ b/gcc/config/i386/i386.h > @@ -337,6 +337,7 @@ extern const struct processor_costs ix86_size_cost; > #define TARGET_HASWELL (ix86_tune == PROCESSOR_HASWELL) > #define TARGET_BONNELL (ix86_tune == PROCESSOR_BONNELL) > #define TARGET_SILVERMONT (ix86_tune == PROCESSOR_SILVERMONT) > +#define TARGET_KNL (ix86_tune == PROCESSOR_KNL) > #define TARGET_INTEL (ix86_tune == PROCESSOR_INTEL) > #define TARGET_GENERIC (ix86_tune == PROCESSOR_GENERIC) > #define TARGET_AMDFAM10 (ix86_tune == PROCESSOR_AMDFAM10) > @@ -2272,6 +2273,7 @@ enum processor_type > PROCESSOR_HASWELL, > PROCESSOR_BONNELL, > PROCESSOR_SILVERMONT, > + PROCESSOR_KNL, > PROCESSOR_INTEL, > PROCESSOR_GEODE, > PROCESSOR_K6, > diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md > index 9019ed8..7ae511c 100644 > --- a/gcc/config/i386/i386.md > +++ b/gcc/config/i386/i386.md > @@ -399,7 +399,7 @@ > ;; Processor type. > (define_attr "cpu" "none,pentium,pentiumpro,geode,k6,athlon,k8,core2,nehalem, > atom,slm,generic,amdfam10,bdver1,bdver2,bdver3,bdver4, > - btver2" > + btver2,knl" > (const (symbol_ref "ix86_schedule"))) > > ;; A basic instruction type. Refinements due to arguments to be > diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def > index b5c6e4f..db43b3d 100644 > --- a/gcc/config/i386/x86-tune.def > +++ b/gcc/config/i386/x86-tune.def > @@ -41,7 +41,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively. > If not, see > /* X86_TUNE_SCHEDULE: Enable scheduling. */ > DEF_TUNE (X86_TUNE_SCHEDULE, "schedule", > m_PENT | m_PPRO | m_CORE_ALL | m_BONNELL | m_SILVERMONT | m_INTEL > - | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC) > + | m_KNL | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC) > > /* X86_TUNE_PARTIAL_REG_DEPENDENCY: Enable more register renaming > on modern chips. Preffer stores affecting whole integer register > @@ -49,7 +49,7 @@ DEF_TUNE (X86_TUNE_SCHEDULE, "schedule", > value over movb. */ > DEF_TUNE (X86_TUNE_PARTIAL_REG_DEPENDENCY, "partial_reg_dependency", > m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT | m_INTEL > - | m_AMD_MULTIPLE | m_GENERIC) > + | m_KNL | m_AMD_MULTIPLE | m_GENERIC) > > /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: This knob promotes all store > destinations to be 128bit to allow register renaming on 128bit SSE units, > @@ -85,13 +85,13 @@ DEF_TUNE (X86_TUNE_PARTIAL_FLAG_REG_STALL, > "partial_flag_reg_stall", > partial dependencies. */ > DEF_TUNE (X86_TUNE_MOVX, "movx", > m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT > - | m_INTEL | m_GEODE | m_AMD_MULTIPLE | m_GENERIC) > + | m_KNL | m_INTEL | m_GEODE | m_AMD_MULTIPLE | m_GENERIC) > > /* X86_TUNE_MEMORY_MISMATCH_STALL: Avoid partial stores that are followed by > full sized loads. */ > DEF_TUNE (X86_TUNE_MEMORY_MISMATCH_STALL, "memory_mismatch_stall", > m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT | m_INTEL > - | m_AMD_MULTIPLE | m_GENERIC) > + | m_KNL | m_AMD_MULTIPLE | m_GENERIC) > > /* X86_TUNE_FUSE_CMP_AND_BRANCH_32: Fuse compare with a subsequent > conditional jump instruction for 32 bit TARGET. > @@ -125,7 +125,7 @@ DEF_TUNE (X86_TUNE_REASSOC_INT_TO_PARALLEL, > "reassoc_int_to_parallel", > /* X86_TUNE_REASSOC_FP_TO_PARALLEL: Try to produce parallel computations > during reassociation of fp computation. */ > DEF_TUNE (X86_TUNE_REASSOC_FP_TO_PARALLEL, "reassoc_fp_to_parallel", > - m_BONNELL | m_SILVERMONT | m_HASWELL | m_INTEL | m_BDVER1 > + m_BONNELL | m_SILVERMONT | m_HASWELL | m_KNL |m_INTEL | m_BDVER1 > | m_BDVER2 | m_GENERIC) > > > /*****************************************************************************/ > @@ -145,7 +145,7 @@ DEF_TUNE (X86_TUNE_REASSOC_FP_TO_PARALLEL, > "reassoc_fp_to_parallel", > regression on mgrid due to IRA limitation leading to unecessary > use of the frame pointer in 32bit mode. */ > DEF_TUNE (X86_TUNE_ACCUMULATE_OUTGOING_ARGS, "accumulate_outgoing_args", > - m_PPRO | m_P4_NOCONA | m_BONNELL | m_SILVERMONT | m_INTEL > + m_PPRO | m_P4_NOCONA | m_BONNELL | m_SILVERMONT | m_KNL | m_INTEL > | m_ATHLON_K8) > > /* X86_TUNE_PROLOGUE_USING_MOVE: Do not use push/pop in prologues that are > @@ -205,7 +205,7 @@ DEF_TUNE (X86_TUNE_PAD_RETURNS, "pad_returns", > /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more > than 4 branch instructions in the 16 byte window. */ > DEF_TUNE (X86_TUNE_FOUR_JUMP_LIMIT, "four_jump_limit", > - m_PPRO | m_P4_NOCONA | m_BONNELL | m_SILVERMONT | m_INTEL | > + m_PPRO | m_P4_NOCONA | m_BONNELL | m_SILVERMONT | m_KNL |m_INTEL | > m_ATHLON_K8 | m_AMDFAM10) > > > /*****************************************************************************/ > @@ -229,21 +229,22 @@ DEF_TUNE (X86_TUNE_READ_MODIFY, "read_modify", ~(m_PENT > | m_PPRO)) > /* X86_TUNE_USE_INCDEC: Enable use of inc/dec instructions. */ > DEF_TUNE (X86_TUNE_USE_INCDEC, "use_incdec", > ~(m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT | m_INTEL > - | m_GENERIC)) > + | m_KNL | m_GENERIC)) > > /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred > for DFmode copies */ > DEF_TUNE (X86_TUNE_INTEGER_DFMODE_MOVES, "integer_dfmode_moves", > ~(m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT > - | m_INTEL | m_GEODE | m_AMD_MULTIPLE | m_GENERIC)) > + | m_KNL | m_INTEL | m_GEODE | m_AMD_MULTIPLE | m_GENERIC)) > > /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag > will impact LEA instruction selection. */ > -DEF_TUNE (X86_TUNE_OPT_AGU, "opt_agu", m_BONNELL | m_SILVERMONT | m_INTEL) > +DEF_TUNE (X86_TUNE_OPT_AGU, "opt_agu", m_BONNELL | m_SILVERMONT | m_KNL > + | m_INTEL) > > /* X86_TUNE_AVOID_LEA_FOR_ADDR: Avoid lea for address computation. */ > DEF_TUNE (X86_TUNE_AVOID_LEA_FOR_ADDR, "avoid_lea_for_addr", > - m_BONNELL | m_SILVERMONT) > + m_BONNELL | m_SILVERMONT | m_KNL) > > /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is > vector path on AMD machines. > @@ -260,7 +261,7 @@ DEF_TUNE (X86_TUNE_SLOW_IMUL_IMM8, "slow_imul_imm8", > /* X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE: Try to avoid memory operands for > a conditional move. */ > DEF_TUNE (X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE, "avoid_mem_opnd_for_cmove", > - m_BONNELL | m_SILVERMONT | m_INTEL) > + m_BONNELL | m_SILVERMONT | m_KNL | m_INTEL) > > /* X86_TUNE_SINGLE_STRINGOP: Enable use of single string operations, such > as MOVS and STOS (without a REP prefix) to move/set sequences of bytes. > */ > @@ -278,17 +279,17 @@ DEF_TUNE (X86_TUNE_MISALIGNED_MOVE_STRING_PRO_EPILOGUES, > /* X86_TUNE_USE_SAHF: Controls use of SAHF. */ > DEF_TUNE (X86_TUNE_USE_SAHF, "use_sahf", > m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT > - | m_INTEL | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER > - | m_GENERIC) > + | m_KNL | m_INTEL | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER > + | m_BTVER | m_GENERIC) > > /* X86_TUNE_USE_CLTD: Controls use of CLTD and CTQO instructions. */ > DEF_TUNE (X86_TUNE_USE_CLTD, "use_cltd", > - ~(m_PENT | m_BONNELL | m_SILVERMONT | m_INTEL | m_K6)) > + ~(m_PENT | m_BONNELL | m_SILVERMONT | m_KNL | m_INTEL | m_K6)) > > /* X86_TUNE_USE_BT: Enable use of BT (bit test) instructions. */ > DEF_TUNE (X86_TUNE_USE_BT, "use_bt", > - m_CORE_ALL | m_BONNELL | m_SILVERMONT | m_INTEL | m_AMD_MULTIPLE > - | m_GENERIC) > + m_CORE_ALL | m_BONNELL | m_SILVERMONT | m_KNL | m_INTEL > + | m_AMD_MULTIPLE | m_GENERIC) > > > /*****************************************************************************/ > /* 387 instruction selection tuning > */ > @@ -304,7 +305,7 @@ DEF_TUNE (X86_TUNE_USE_HIMODE_FIOP, "use_himode_fiop", > integer operand. */ > DEF_TUNE (X86_TUNE_USE_SIMODE_FIOP, "use_simode_fiop", > ~(m_PENT | m_PPRO | m_CORE_ALL | m_BONNELL | m_SILVERMONT > - | m_INTEL | m_AMD_MULTIPLE | m_GENERIC)) > + | m_KNL | m_INTEL | m_AMD_MULTIPLE | m_GENERIC)) > > /* X86_TUNE_USE_FFREEP: Use freep instruction instead of fstp. */ > DEF_TUNE (X86_TUNE_USE_FFREEP, "use_ffreep", m_AMD_MULTIPLE) > @@ -312,7 +313,7 @@ DEF_TUNE (X86_TUNE_USE_FFREEP, "use_ffreep", > m_AMD_MULTIPLE) > /* X86_TUNE_EXT_80387_CONSTANTS: Use fancy 80387 constants, such as PI. */ > DEF_TUNE (X86_TUNE_EXT_80387_CONSTANTS, "ext_80387_constants", > m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT > - | m_INTEL | m_K6_GEODE | m_ATHLON_K8 | m_GENERIC) > + | m_KNL | m_INTEL | m_K6_GEODE | m_ATHLON_K8 | m_GENERIC) > > > /*****************************************************************************/ > /* SSE instruction selection tuning > */ > @@ -331,13 +332,13 @@ DEF_TUNE (X86_TUNE_GENERAL_REGS_SSE_SPILL, > "general_regs_sse_spill", > of a sequence loading registers by parts. */ > DEF_TUNE (X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL, "sse_unaligned_load_optimal", > m_NEHALEM | m_SANDYBRIDGE | m_HASWELL | m_AMDFAM10 | m_BDVER > - | m_BTVER | m_SILVERMONT | m_INTEL | m_GENERIC) > + | m_BTVER | m_SILVERMONT | m_KNL | m_INTEL | m_GENERIC) > > /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL: Use movups for misaligned stores > instead > of a sequence loading registers by parts. */ > DEF_TUNE (X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL, > "sse_unaligned_store_optimal", > m_NEHALEM | m_SANDYBRIDGE | m_HASWELL | m_BDVER | m_SILVERMONT > - | m_INTEL | m_GENERIC) > + | m_KNL | m_INTEL | m_GENERIC) > > /* Use packed single precision instructions where posisble. I.e. movups > instead > of movupd. */ > @@ -374,7 +375,7 @@ DEF_TUNE (X86_TUNE_INTER_UNIT_CONVERSIONS, > "inter_unit_conversions", > /* X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS: Try to split memory operand for > fp converts to destination register. */ > DEF_TUNE (X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS, > "split_mem_opnd_for_fp_converts", > - m_SILVERMONT | m_INTEL) > + m_SILVERMONT | m_KNL | m_INTEL) > > /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion > from FP to FP. This form of instructions avoids partial write to the > @@ -388,7 +389,7 @@ DEF_TUNE (X86_TUNE_USE_VECTOR_CONVERTS, > "use_vector_converts", m_AMDFAM10) > > /* X86_TUNE_SLOW_SHUFB: Indicates tunings with slow pshufb instruction. */ > DEF_TUNE (X86_TUNE_SLOW_PSHUFB, "slow_pshufb", > - m_BONNELL | m_SILVERMONT | m_INTEL) > + m_BONNELL | m_SILVERMONT | m_KNL | m_INTEL) > > /* X86_TUNE_VECTOR_PARALLEL_EXECUTION: Indicates tunings with ability to > execute 2 or more vector instructions in parallel. */ > diff --git a/gcc/testsuite/gcc.target/i386/funcspec-5.c > b/gcc/testsuite/gcc.target/i386/funcspec-5.c > index 0acfe00..269e610 100644 > --- a/gcc/testsuite/gcc.target/i386/funcspec-5.c > +++ b/gcc/testsuite/gcc.target/i386/funcspec-5.c > @@ -24,6 +24,7 @@ extern void test_ssse3 (void) > __attribute__((__target__("ssse3"))); > extern void test_tbm (void) > __attribute__((__target__("tbm"))); > extern void test_avx (void) > __attribute__((__target__("avx"))); > extern void test_avx2 (void) > __attribute__((__target__("avx2"))); > +extern void test_avx512 (void) > __attribute__((__target__("avx512"))); > > extern void test_no_abm (void) > __attribute__((__target__("no-abm"))); > extern void test_no_aes (void) > __attribute__((__target__("no-aes"))); > @@ -46,6 +47,7 @@ extern void test_no_ssse3 (void) > __attribute__((__target__("no-ssse3"))); > extern void test_no_tbm (void) > __attribute__((__target__("no-tbm"))); > extern void test_no_avx (void) > __attribute__((__target__("no-avx"))); > extern void test_no_avx2 (void) > __attribute__((__target__("no-avx2"))); > +extern void test_no_avx512 (void) > __attribute__((__target__("no-avx512"))); > > extern void test_arch_i386 (void) > __attribute__((__target__("arch=i386"))); > extern void test_arch_i486 (void) > __attribute__((__target__("arch=i486"))); > @@ -70,6 +72,7 @@ extern void test_arch_core2 (void) > __attribute__((__target__("arch=core2"))); > extern void test_arch_corei7 (void) > __attribute__((__target__("arch=corei7"))); > extern void test_arch_corei7_avx (void) > __attribute__((__target__("arch=corei7-avx"))); > extern void test_arch_core_avx2 (void) > __attribute__((__target__("arch=core-avx2"))); > +extern void test_arch_knl (void) > __attribute__((__target__("arch=knl"))); > extern void test_arch_geode (void) > __attribute__((__target__("arch=geode"))); > extern void test_arch_k6 (void) > __attribute__((__target__("arch=k6"))); > extern void test_arch_k6_2 (void) > __attribute__((__target__("arch=k6-2"))); > -- > 1.8.3.1 >