Hi craig.topper,
Since, according to the ISA extension manual, (EVEX versions of) the FMA3/F16C
instructions are a core part of AVX512F, it seems to me we should enable the
features when avx512f is enabled (e.g., -mavx512f). If not, users need to
either specify a specific CPU, or add the redundant -mfma/-mf16c.
http://reviews.llvm.org/D10617
Files:
lib/Basic/Targets.cpp
test/Preprocessor/x86_target_features.c
EMAIL PREFERENCES
http://reviews.llvm.org/settings/panel/emailpreferences/
Index: lib/Basic/Targets.cpp
===================================================================
--- lib/Basic/Targets.cpp
+++ lib/Basic/Targets.cpp
@@ -2520,6 +2520,7 @@
setFeatureEnabledImpl(Features, "cx16", true);
break;
case CK_KNL:
+ // avx512f implies fma and f16c.
setFeatureEnabledImpl(Features, "avx512f", true);
setFeatureEnabledImpl(Features, "avx512cd", true);
setFeatureEnabledImpl(Features, "avx512er", true);
@@ -2530,9 +2531,7 @@
setFeatureEnabledImpl(Features, "bmi", true);
setFeatureEnabledImpl(Features, "bmi2", true);
setFeatureEnabledImpl(Features, "rtm", true);
- setFeatureEnabledImpl(Features, "fma", true);
setFeatureEnabledImpl(Features, "rdrnd", true);
- setFeatureEnabledImpl(Features, "f16c", true);
setFeatureEnabledImpl(Features, "fsgsbase", true);
setFeatureEnabledImpl(Features, "aes", true);
setFeatureEnabledImpl(Features, "pclmul", true);
@@ -2619,6 +2618,8 @@
switch (Level) {
case AVX512F:
Features["avx512f"] = true;
+ Features["fma"] = true;
+ Features["f16c"] = true;
case AVX2:
Features["avx2"] = true;
case AVX:
@@ -2665,7 +2666,7 @@
case AVX512F:
Features["avx512f"] = Features["avx512cd"] = Features["avx512er"] =
Features["avx512pf"] = Features["avx512dq"] = Features["avx512bw"] =
- Features["avx512vl"] = false;
+ Features["avx512vl"] = Features["fma"] = Features["f16c"] = false;
}
}
Index: test/Preprocessor/x86_target_features.c
===================================================================
--- test/Preprocessor/x86_target_features.c
+++ test/Preprocessor/x86_target_features.c
@@ -64,6 +64,8 @@
// AVX512F: #define __AVX2__ 1
// AVX512F: #define __AVX512F__ 1
// AVX512F: #define __AVX__ 1
+// AVX512F: #define __F16C__ 1
+// AVX512F: #define __FMA__ 1
// AVX512F: #define __SSE2_MATH__ 1
// AVX512F: #define __SSE2__ 1
// AVX512F: #define __SSE3__ 1
@@ -79,6 +81,8 @@
// AVX512CD: #define __AVX512CD__ 1
// AVX512CD: #define __AVX512F__ 1
// AVX512CD: #define __AVX__ 1
+// AVX512CD: #define __F16C__ 1
+// AVX512CD: #define __FMA__ 1
// AVX512CD: #define __SSE2_MATH__ 1
// AVX512CD: #define __SSE2__ 1
// AVX512CD: #define __SSE3__ 1
@@ -94,6 +98,8 @@
// AVX512ER: #define __AVX512ER__ 1
// AVX512ER: #define __AVX512F__ 1
// AVX512ER: #define __AVX__ 1
+// AVX512ER: #define __F16C__ 1
+// AVX512ER: #define __FMA__ 1
// AVX512ER: #define __SSE2_MATH__ 1
// AVX512ER: #define __SSE2__ 1
// AVX512ER: #define __SSE3__ 1
@@ -109,6 +115,8 @@
// AVX512PF: #define __AVX512F__ 1
// AVX512PF: #define __AVX512PF__ 1
// AVX512PF: #define __AVX__ 1
+// AVX512PF: #define __F16C__ 1
+// AVX512PF: #define __FMA__ 1
// AVX512PF: #define __SSE2_MATH__ 1
// AVX512PF: #define __SSE2__ 1
// AVX512PF: #define __SSE3__ 1
@@ -124,6 +132,8 @@
// AVX512DQ: #define __AVX512DQ__ 1
// AVX512DQ: #define __AVX512F__ 1
// AVX512DQ: #define __AVX__ 1
+// AVX512DQ: #define __F16C__ 1
+// AVX512DQ: #define __FMA__ 1
// AVX512DQ: #define __SSE2_MATH__ 1
// AVX512DQ: #define __SSE2__ 1
// AVX512DQ: #define __SSE3__ 1
@@ -139,6 +149,8 @@
// AVX512BW: #define __AVX512BW__ 1
// AVX512BW: #define __AVX512F__ 1
// AVX512BW: #define __AVX__ 1
+// AVX512BW: #define __F16C__ 1
+// AVX512BW: #define __FMA__ 1
// AVX512BW: #define __SSE2_MATH__ 1
// AVX512BW: #define __SSE2__ 1
// AVX512BW: #define __SSE3__ 1
@@ -154,6 +166,8 @@
// AVX512VL: #define __AVX512F__ 1
// AVX512VL: #define __AVX512VL__ 1
// AVX512VL: #define __AVX__ 1
+// AVX512VL: #define __F16C__ 1
+// AVX512VL: #define __FMA__ 1
// AVX512VL: #define __SSE2_MATH__ 1
// AVX512VL: #define __SSE2__ 1
// AVX512VL: #define __SSE3__ 1
@@ -168,6 +182,8 @@
// AVX512F2: #define __AVX2__ 1
// AVX512F2-NOT: #define __AVX512F__ 1
// AVX512F2-NOT: #define __AVX512PF__ 1
+// AVX512F2-NOT: #define __F16C__ 1
+// AVX512F2-NOT: #define __FMA__ 1
// AVX512F2: #define __AVX__ 1
// AVX512F2: #define __SSE2_MATH__ 1
// AVX512F2: #define __SSE2__ 1
_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits