> Sorry, I must have been looking at an older version, but as I said I already > did enable it in the latest patch. (see > http://gcc.gnu.org/ml/gcc-patches/2013-12/msg01577.html )
Sorry for causing another revision but we would like to stick with "btver1" and "btver2" rather than "BOBCAT" or "JAGUAR". Therefore the changes would be like Index: gcc/config/i386/i386.c =================================================================== --- gcc/config/i386/i386.c (revision 206065) +++ gcc/config/i386/i386.c (working copy) @@ -29965,9 +29965,14 @@ P_PROC_SSE4_2, P_POPCNT, P_AVX, + P_PROC_AVX, + P_FMA4, + P_XOP, + P_PROC_XOP, + P_FMA, + P_PROC_FMA, P_AVX2, - P_FMA, - P_PROC_FMA + P_PROC_AVX2 }; enum feature_priority priority = P_ZERO; @@ -29986,11 +29991,15 @@ {"sse", P_SSE}, {"sse2", P_SSE2}, {"sse3", P_SSE3}, + {"sse4a", P_SSE4_a}, {"ssse3", P_SSSE3}, {"sse4.1", P_SSE4_1}, {"sse4.2", P_SSE4_2}, {"popcnt", P_POPCNT}, {"avx", P_AVX}, + {"fma4", P_FMA4}, + {"xop", P_XOP}, + {"fma", P_FMA}, {"avx2", P_AVX2} }; @@ -30044,25 +30053,49 @@ break; case PROCESSOR_COREI7_AVX: arg_str = "corei7-avx"; - priority = P_PROC_SSE4_2; + priority = P_PROC_AVX; break; + case PROCESSOR_HASWELL: + arg_str = "core-avx2"; + priority = P_PROC_AVX2; + break; case PROCESSOR_ATOM: arg_str = "atom"; priority = P_PROC_SSSE3; break; + case PROCESSOR_SLM: + arg_str = "slm"; + priority = P_PROC_SSE4_2; + break; case PROCESSOR_AMDFAM10: arg_str = "amdfam10h"; priority = P_PROC_SSE4_a; break; + case PROCESSOR_BTVER1: + arg_str = "btver1"; + priority = P_PROC_SSE4_a; + break; + case PROCESSOR_BTVER2: + arg_str = "btver2"; + priority = P_PROC_AVX; + break; case PROCESSOR_BDVER1: arg_str = "bdver1"; - priority = P_PROC_FMA; + priority = P_PROC_XOP; break; case PROCESSOR_BDVER2: arg_str = "bdver2"; priority = P_PROC_FMA; break; - } + case PROCESSOR_BDVER3: + arg_str = "bdver3"; + priority = P_PROC_FMA; + break; + case PROCESSOR_BDVER4: + arg_str = "bdver4"; + priority = P_PROC_AVX2; + break; + } } cl_target_option_restore (&global_options, &cur_target); @@ -30922,9 +30955,13 @@ F_SSE2, F_SSE3, F_SSSE3, + F_SSE4_a, F_SSE4_1, F_SSE4_2, F_AVX, + F_FMA4, + F_XOP, + F_FMA, F_AVX2, F_MAX }; @@ -30943,6 +30980,10 @@ M_AMDFAM10H, M_AMDFAM15H, M_INTEL_SLM, + M_INTEL_COREI7_AVX, + M_INTEL_CORE_AVX2, + M_AMD_BTVER1, + M_AMD_BTVER2, M_CPU_SUBTYPE_START, M_INTEL_COREI7_NEHALEM, M_INTEL_COREI7_WESTMERE, @@ -30953,7 +30994,9 @@ M_AMDFAM15H_BDVER1, M_AMDFAM15H_BDVER2, M_AMDFAM15H_BDVER3, - M_AMDFAM15H_BDVER4 + M_AMDFAM15H_BDVER4, + M_INTEL_COREI7_IVYBRIDGE, + M_INTEL_CORE_HASWELL }; static struct _arch_names_table @@ -30971,11 +31014,17 @@ {"corei7", M_INTEL_COREI7}, {"nehalem", M_INTEL_COREI7_NEHALEM}, {"westmere", M_INTEL_COREI7_WESTMERE}, + {"corei7-avx", M_INTEL_COREI7_AVX}, {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE}, + {"ivybridge", M_INTEL_COREI7_IVYBRIDGE}, + {"core-avx2", M_INTEL_CORE_AVX2}, + {"haswell", M_INTEL_CORE_HASWELL}, {"amdfam10h", M_AMDFAM10H}, {"barcelona", M_AMDFAM10H_BARCELONA}, {"shanghai", M_AMDFAM10H_SHANGHAI}, {"istanbul", M_AMDFAM10H_ISTANBUL}, + {"btver1", M_AMD_BTVER1}, + {"btver2", M_AMD_BTVER2}, {"amdfam15h", M_AMDFAM15H}, {"bdver1", M_AMDFAM15H_BDVER1}, {"bdver2", M_AMDFAM15H_BDVER2}, @@ -30997,9 +31046,13 @@ {"sse2", F_SSE2}, {"sse3", F_SSE3}, {"ssse3", F_SSSE3}, + {"sse4a", F_SSE4_a}, {"sse4.1", F_SSE4_1}, {"sse4.2", F_SSE4_2}, {"avx", F_AVX}, + {"fma4", F_FMA4}, + {"xop", F_XOP}, + {"fma", F_FMA}, {"avx2", F_AVX2} }; Index: libgcc/config/i386/cpuinfo.c =================================================================== --- libgcc/config/i386/cpuinfo.c (revision 206065) +++ libgcc/config/i386/cpuinfo.c (working copy) @@ -62,6 +62,10 @@ AMDFAM10H, AMDFAM15H, INTEL_SLM, + INTEL_COREI7_AVX, + INTEL_CORE_AVX2, + AMD_BTVER1, + AMD_BTVER2, CPU_TYPE_MAX }; @@ -75,6 +79,10 @@ AMDFAM10H_ISTANBUL, AMDFAM15H_BDVER1, AMDFAM15H_BDVER2, + AMDFAM15H_BDVER3, + AMDFAM15H_BDVER4, + INTEL_COREI7_IVYBRIDGE, + INTEL_CORE_HASWELL, CPU_SUBTYPE_MAX }; @@ -89,9 +97,13 @@ FEATURE_SSE2, FEATURE_SSE3, FEATURE_SSSE3, + FEATURE_SSE4_a, FEATURE_SSE4_1, FEATURE_SSE4_2, FEATURE_AVX, + FEATURE_FMA4, + FEATURE_XOP, + FEATURE_FMA, FEATURE_AVX2 }; @@ -113,37 +125,46 @@ { /* AMD Family 10h. */ case 0x10: + __cpu_model.__cpu_type = AMDFAM10H; switch (model) { case 0x2: /* Barcelona. */ - __cpu_model.__cpu_type = AMDFAM10H; __cpu_model.__cpu_subtype = AMDFAM10H_BARCELONA; break; case 0x4: /* Shanghai. */ - __cpu_model.__cpu_type = AMDFAM10H; __cpu_model.__cpu_subtype = AMDFAM10H_SHANGHAI; break; case 0x8: /* Istanbul. */ - __cpu_model.__cpu_type = AMDFAM10H; __cpu_model.__cpu_subtype = AMDFAM10H_ISTANBUL; break; default: break; } break; - /* AMD Family 15h. */ + /* AMD Family 14h "btver1". */ + case 0x14: + __cpu_model.__cpu_type = AMD_BTVER1; + break; + /* AMD Family 15h "Bulldozer". */ case 0x15: __cpu_model.__cpu_type = AMDFAM15H; /* Bulldozer version 1. */ if ( model <= 0xf) __cpu_model.__cpu_subtype = AMDFAM15H_BDVER1; - /* Bulldozer version 2. */ - if (model >= 0x10 && model <= 0x1f) - __cpu_model.__cpu_subtype = AMDFAM15H_BDVER2; + /* Bulldozer version 2 "Piledriver" */ + if (model >= 0x10 && model <= 0x2f) + __cpu_model.__cpu_subtype = AMDFAM15H_BDVER2; + /* Bulldozer version 3 "Steamroller" */ + if (model >= 0x30 && model <= 0x4f) + __cpu_model.__cpu_subtype = AMDFAM15H_BDVER3; break; + /* AMD Family 16h "btver2". */ + case 0x16: + __cpu_model.__cpu_type = AMD_BTVER2; + break; default: break; } @@ -193,9 +214,21 @@ case 0x2a: case 0x2d: /* Sandy Bridge. */ - __cpu_model.__cpu_type = INTEL_COREI7; + __cpu_model.__cpu_type = INTEL_COREI7_AVX; __cpu_model.__cpu_subtype = INTEL_COREI7_SANDYBRIDGE; break; + case 0x3a: + case 0x3e: + /* Ivy Bridge. */ + __cpu_model.__cpu_type = INTEL_COREI7_AVX; + __cpu_model.__cpu_subtype = INTEL_COREI7_IVYBRIDGE; + case 0x3c: + case 0x3f: + case 0x45: + case 0x46: + /* Haswell. */ + __cpu_model.__cpu_type = INTEL_CORE_AVX2; + __cpu_model.__cpu_subtype = INTEL_CORE_HASWELL; case 0x17: case 0x1d: /* Penryn. */ @@ -242,6 +275,8 @@ features |= (1 << FEATURE_SSE4_2); if (ecx & bit_AVX) features |= (1 << FEATURE_AVX); + if (ecx & bit_FMA) + features |= (1 << FEATURE_FMA); /* Get Advanced Features at level 7 (eax = 7, ecx = 0). */ if (max_cpuid_level >= 7) @@ -252,6 +287,23 @@ features |= (1 << FEATURE_AVX2); } + unsigned int ext_level; + unsigned int eax, ebx; + /* Check cpuid level of extended features. */ + __cpuid (0x80000000, ext_level, ebx, ecx, edx); + + if (ext_level > 0x80000000) + { + __cpuid (0x80000001, eax, ebx, ecx, edx); + + if (ecx & bit_SSE4a) + features |= (1 << FEATURE_SSE4_a); + if (ecx & bit_FMA4) + features |= (1 << FEATURE_FMA4); + if (ecx & bit_XOP) + features |= (1 << FEATURE_XOP); + } + __cpu_model.__cpu_features[0] = features; } Regards Ganesh