craig.topper updated this revision to Diff 167416.
craig.topper added a comment.

Add comment. Fix typo. Add preprocessor define checks to the various CPUs that 
have MOVBE


https://reviews.llvm.org/D52586

Files:
  lib/Basic/Targets/X86.cpp
  lib/Headers/immintrin.h
  test/CodeGen/movbe-builtins.c
  test/Preprocessor/predefined-arch-macros.c

Index: test/Preprocessor/predefined-arch-macros.c
===================================================================
--- test/Preprocessor/predefined-arch-macros.c
+++ test/Preprocessor/predefined-arch-macros.c
@@ -524,6 +524,7 @@
 // CHECK_CORE_AVX2_M32: #define __INVPCID__ 1
 // CHECK_CORE_AVX2_M32: #define __LZCNT__ 1
 // CHECK_CORE_AVX2_M32: #define __MMX__ 1
+// CHECK_CORE_AVX2_M32: #define __MOVBE__ 1
 // CHECK_CORE_AVX2_M32: #define __PCLMUL__ 1
 // CHECK_CORE_AVX2_M32: #define __POPCNT__ 1
 // CHECK_CORE_AVX2_M32: #define __RDRND__ 1
@@ -554,6 +555,7 @@
 // CHECK_CORE_AVX2_M64: #define __INVPCID__ 1
 // CHECK_CORE_AVX2_M64: #define __LZCNT__ 1
 // CHECK_CORE_AVX2_M64: #define __MMX__ 1
+// CHECK_CORE_AVX2_M64: #define __MOVBE__ 1
 // CHECK_CORE_AVX2_M64: #define __PCLMUL__ 1
 // CHECK_CORE_AVX2_M64: #define __POPCNT__ 1
 // CHECK_CORE_AVX2_M64: #define __RDRND__ 1
@@ -588,6 +590,7 @@
 // CHECK_BROADWELL_M32: #define __INVPCID__ 1
 // CHECK_BROADWELL_M32: #define __LZCNT__ 1
 // CHECK_BROADWELL_M32: #define __MMX__ 1
+// CHECK_BROADWELL_M32: #define __MOVBE__ 1
 // CHECK_BROADWELL_M32: #define __PCLMUL__ 1
 // CHECK_BROADWELL_M32: #define __POPCNT__ 1
 // CHECK_BROADWELL_M32: #define __PRFCHW__ 1
@@ -621,6 +624,7 @@
 // CHECK_BROADWELL_M64: #define __INVPCID__ 1
 // CHECK_BROADWELL_M64: #define __LZCNT__ 1
 // CHECK_BROADWELL_M64: #define __MMX__ 1
+// CHECK_BROADWELL_M64: #define __MOVBE__ 1
 // CHECK_BROADWELL_M64: #define __PCLMUL__ 1
 // CHECK_BROADWELL_M64: #define __POPCNT__ 1
 // CHECK_BROADWELL_M64: #define __PRFCHW__ 1
@@ -659,6 +663,7 @@
 // CHECK_SKL_M32: #define __INVPCID__ 1
 // CHECK_SKL_M32: #define __LZCNT__ 1
 // CHECK_SKL_M32: #define __MMX__ 1
+// CHECK_SKL_M32: #define __MOVBE__ 1
 // CHECK_SKL_M32: #define __MPX__ 1
 // CHECK_SKL_M32: #define __PCLMUL__ 1
 // CHECK_SKL_M32: #define __POPCNT__ 1
@@ -694,6 +699,7 @@
 // CHECK_SKL_M64: #define __INVPCID__ 1
 // CHECK_SKL_M64: #define __LZCNT__ 1
 // CHECK_SKL_M64: #define __MMX__ 1
+// CHECK_SKL_M64: #define __MOVBE__ 1
 // CHECK_SKL_M64: #define __MPX__ 1
 // CHECK_SKL_M64: #define __PCLMUL__ 1
 // CHECK_SKL_M64: #define __POPCNT__ 1
@@ -735,6 +741,7 @@
 // CHECK_KNL_M32: #define __FMA__ 1
 // CHECK_KNL_M32: #define __LZCNT__ 1
 // CHECK_KNL_M32: #define __MMX__ 1
+// CHECK_KNL_M32: #define __MOVBE__ 1
 // CHECK_KNL_M32: #define __PCLMUL__ 1
 // CHECK_KNL_M32: #define __POPCNT__ 1
 // CHECK_KNL_M32: #define __PREFETCHWT1__ 1
@@ -772,6 +779,7 @@
 // CHECK_KNL_M64: #define __FMA__ 1
 // CHECK_KNL_M64: #define __LZCNT__ 1
 // CHECK_KNL_M64: #define __MMX__ 1
+// CHECK_KNL_M64: #define __MOVBE__ 1
 // CHECK_KNL_M64: #define __PCLMUL__ 1
 // CHECK_KNL_M64: #define __POPCNT__ 1
 // CHECK_KNL_M64: #define __PREFETCHWT1__ 1
@@ -813,6 +821,7 @@
 // CHECK_KNM_M32: #define __FMA__ 1
 // CHECK_KNM_M32: #define __LZCNT__ 1
 // CHECK_KNM_M32: #define __MMX__ 1
+// CHECK_KNM_M32: #define __MOVBE__ 1
 // CHECK_KNM_M32: #define __PCLMUL__ 1
 // CHECK_KNM_M32: #define __POPCNT__ 1
 // CHECK_KNM_M32: #define __PREFETCHWT1__ 1
@@ -848,6 +857,7 @@
 // CHECK_KNM_M64: #define __FMA__ 1
 // CHECK_KNM_M64: #define __LZCNT__ 1
 // CHECK_KNM_M64: #define __MMX__ 1
+// CHECK_KNM_M64: #define __MOVBE__ 1
 // CHECK_KNM_M64: #define __PCLMUL__ 1
 // CHECK_KNM_M64: #define __POPCNT__ 1
 // CHECK_KNM_M64: #define __PREFETCHWT1__ 1
@@ -889,6 +899,7 @@
 // CHECK_SKX_M32: #define __INVPCID__ 1
 // CHECK_SKX_M32: #define __LZCNT__ 1
 // CHECK_SKX_M32: #define __MMX__ 1
+// CHECK_SKX_M32: #define __MOVBE__ 1
 // CHECK_SKX_M32: #define __MPX__ 1
 // CHECK_SKX_M32: #define __PCLMUL__ 1
 // CHECK_SKX_M32: #define __PKU__ 1
@@ -935,6 +946,7 @@
 // CHECK_SKX_M64: #define __INVPCID__ 1
 // CHECK_SKX_M64: #define __LZCNT__ 1
 // CHECK_SKX_M64: #define __MMX__ 1
+// CHECK_SKX_M64: #define __MOVBE__ 1
 // CHECK_SKX_M64: #define __MPX__ 1
 // CHECK_SKX_M64: #define __PCLMUL__ 1
 // CHECK_SKX_M64: #define __PKU__ 1
@@ -986,6 +998,7 @@
 // CHECK_CNL_M32: #define __INVPCID__ 1
 // CHECK_CNL_M32: #define __LZCNT__ 1
 // CHECK_CNL_M32: #define __MMX__ 1
+// CHECK_CNL_M32: #define __MOVBE__ 1
 // CHECK_CNL_M32: #define __MPX__ 1
 // CHECK_CNL_M32: #define __PCLMUL__ 1
 // CHECK_CNL_M32: #define __PKU__ 1
@@ -1035,6 +1048,7 @@
 // CHECK_CNL_M64: #define __INVPCID__ 1
 // CHECK_CNL_M64: #define __LZCNT__ 1
 // CHECK_CNL_M64: #define __MMX__ 1
+// CHECK_CNL_M64: #define __MOVBE__ 1
 // CHECK_CNL_M64: #define __MPX__ 1
 // CHECK_CNL_M64: #define __PCLMUL__ 1
 // CHECK_CNL_M64: #define __PKU__ 1
@@ -1090,6 +1104,7 @@
 // CHECK_ICL_M32: #define __INVPCID__ 1
 // CHECK_ICL_M32: #define __LZCNT__ 1
 // CHECK_ICL_M32: #define __MMX__ 1
+// CHECK_ICL_M32: #define __MOVBE__ 1
 // CHECK_ICL_M32: #define __MPX__ 1
 // CHECK_ICL_M32: #define __PCLMUL__ 1
 // CHECK_ICL_M32: #define __PKU__ 1
@@ -1148,6 +1163,7 @@
 // CHECK_ICL_M64: #define __INVPCID__ 1
 // CHECK_ICL_M64: #define __LZCNT__ 1
 // CHECK_ICL_M64: #define __MMX__ 1
+// CHECK_ICL_M64: #define __MOVBE__ 1
 // CHECK_ICL_M64: #define __MPX__ 1
 // CHECK_ICL_M64: #define __PCLMUL__ 1
 // CHECK_ICL_M64: #define __PKU__ 1
@@ -1207,6 +1223,7 @@
 // CHECK_ICX_M32: #define __INVPCID__ 1
 // CHECK_ICX_M32: #define __LZCNT__ 1
 // CHECK_ICX_M32: #define __MMX__ 1
+// CHECK_ICX_M32: #define __MOVBE__ 1
 // CHECK_ICX_M32: #define __MPX__ 1
 // CHECK_ICX_M32: #define __PCLMUL__ 1
 // CHECK_ICX_M32: #define __PCONFIG__ 1
@@ -1266,6 +1283,7 @@
 // CHECK_ICX_M64: #define __INVPCID__ 1
 // CHECK_ICX_M64: #define __LZCNT__ 1
 // CHECK_ICX_M64: #define __MMX__ 1
+// CHECK_ICX_M64: #define __MOVBE__ 1
 // CHECK_ICX_M64: #define __MPX__ 1
 // CHECK_ICX_M64: #define __PCLMUL__ 1
 // CHECK_ICX_M64: #define __PCONFIG__ 1
@@ -1303,6 +1321,7 @@
 // RUN:     -target i386-unknown-linux \
 // RUN:   | FileCheck -match-full-lines %s -check-prefix=CHECK_ATOM_M32
 // CHECK_ATOM_M32: #define __MMX__ 1
+// CHECK_ATOM_M32: #define __MOVBE__ 1
 // CHECK_ATOM_M32: #define __SSE2__ 1
 // CHECK_ATOM_M32: #define __SSE3__ 1
 // CHECK_ATOM_M32: #define __SSE__ 1
@@ -1318,6 +1337,7 @@
 // RUN:     -target i386-unknown-linux \
 // RUN:   | FileCheck -match-full-lines %s -check-prefix=CHECK_ATOM_M64
 // CHECK_ATOM_M64: #define __MMX__ 1
+// CHECK_ATOM_M64: #define __MOVBE__ 1
 // CHECK_ATOM_M64: #define __SSE2_MATH__ 1
 // CHECK_ATOM_M64: #define __SSE2__ 1
 // CHECK_ATOM_M64: #define __SSE3__ 1
@@ -1340,6 +1360,7 @@
 // CHECK_GLM_M32: #define __FSGSBASE__ 1
 // CHECK_GLM_M32: #define __FXSR__ 1
 // CHECK_GLM_M32: #define __MMX__ 1
+// CHECK_GLM_M32: #define __MOVBE__ 1
 // CHECK_GLM_M32: #define __MPX__ 1
 // CHECK_GLM_M32: #define __PCLMUL__ 1
 // CHECK_GLM_M32: #define __POPCNT__ 1
@@ -1373,6 +1394,7 @@
 // CHECK_GLM_M64: #define __FSGSBASE__ 1
 // CHECK_GLM_M64: #define __FXSR__ 1
 // CHECK_GLM_M64: #define __MMX__ 1
+// CHECK_GLM_M64: #define __MOVBE__ 1
 // CHECK_GLM_M64: #define __MPX__ 1
 // CHECK_GLM_M64: #define __PCLMUL__ 1
 // CHECK_GLM_M64: #define __POPCNT__ 1
@@ -1404,6 +1426,7 @@
 // CHECK_GLMP_M32: #define __FSGSBASE__ 1
 // CHECK_GLMP_M32: #define __FXSR__ 1
 // CHECK_GLMP_M32: #define __MMX__ 1
+// CHECK_GLMP_M32: #define __MOVBE__ 1
 // CHECK_GLMP_M32: #define __MPX__ 1
 // CHECK_GLMP_M32: #define __PCLMUL__ 1
 // CHECK_GLMP_M32: #define __POPCNT__ 1
@@ -1440,6 +1463,7 @@
 // CHECK_GLMP_M64: #define __FSGSBASE__ 1
 // CHECK_GLMP_M64: #define __FXSR__ 1
 // CHECK_GLMP_M64: #define __MMX__ 1
+// CHECK_GLMP_M64: #define __MOVBE__ 1
 // CHECK_GLMP_M64: #define __MPX__ 1
 // CHECK_GLMP_M64: #define __PCLMUL__ 1
 // CHECK_GLMP_M64: #define __POPCNT__ 1
@@ -1476,6 +1500,7 @@
 // CHECK_TRM_M32: #define __FXSR__ 1
 // CHECK_TRM_M32: #define __GFNI__ 1
 // CHECK_TRM_M32: #define __MMX__ 1
+// CHECK_TRM_M32: #define __MOVBE__ 1
 // CHECK_TRM_M32: #define __MOVDIR64B__ 1
 // CHECK_TRM_M32: #define __MOVDIRI__ 1
 // CHECK_TRM_M32: #define __MPX__ 1
@@ -1517,6 +1542,7 @@
 // CHECK_TRM_M64: #define __FXSR__ 1
 // CHECK_TRM_M64: #define __GFNI__ 1
 // CHECK_TRM_M64: #define __MMX__ 1
+// CHECK_TRM_M64: #define __MOVBE__ 1
 // CHECK_TRM_M64: #define __MOVDIR64B__ 1
 // CHECK_TRM_M64: #define __MOVDIRI__ 1
 // CHECK_TRM_M64: #define __MPX__ 1
@@ -1551,6 +1577,7 @@
 // RUN:   | FileCheck -match-full-lines %s -check-prefix=CHECK_SLM_M32
 // CHECK_SLM_M32: #define __FXSR__ 1
 // CHECK_SLM_M32: #define __MMX__ 1
+// CHECK_SLM_M32: #define __MOVBE__ 1
 // CHECK_SLM_M32: #define __PCLMUL__ 1
 // CHECK_SLM_M32: #define __POPCNT__ 1
 // CHECK_SLM_M32: #define __PRFCHW__ 1
@@ -1573,6 +1600,7 @@
 // RUN:   | FileCheck -match-full-lines %s -check-prefix=CHECK_SLM_M64
 // CHECK_SLM_M64: #define __FXSR__ 1
 // CHECK_SLM_M64: #define __MMX__ 1
+// CHECK_SLM_M64: #define __MOVBE__ 1
 // CHECK_SLM_M64: #define __PCLMUL__ 1
 // CHECK_SLM_M64: #define __POPCNT__ 1
 // CHECK_SLM_M64: #define __PRFCHW__ 1
@@ -2134,6 +2162,7 @@
 // CHECK_BTVER2_M32: #define __F16C__ 1
 // CHECK_BTVER2_M32: #define __LZCNT__ 1
 // CHECK_BTVER2_M32: #define __MMX__ 1
+// CHECK_BTVER2_M32: #define __MOVBE__ 1
 // CHECK_BTVER2_M32: #define __PCLMUL__ 1
 // CHECK_BTVER2_M32: #define __POPCNT__ 1
 // CHECK_BTVER2_M32: #define __PRFCHW__ 1
@@ -2163,6 +2192,7 @@
 // CHECK_BTVER2_M64: #define __F16C__ 1
 // CHECK_BTVER2_M64: #define __LZCNT__ 1
 // CHECK_BTVER2_M64: #define __MMX__ 1
+// CHECK_BTVER2_M64: #define __MOVBE__ 1
 // CHECK_BTVER2_M64: #define __PCLMUL__ 1
 // CHECK_BTVER2_M64: #define __POPCNT__ 1
 // CHECK_BTVER2_M64: #define __PRFCHW__ 1
@@ -2491,6 +2521,7 @@
 // CHECK_ZNVER1_M32: #define __FSGSBASE__ 1
 // CHECK_ZNVER1_M32: #define __LZCNT__ 1
 // CHECK_ZNVER1_M32: #define __MMX__ 1
+// CHECK_ZNVER1_M32: #define __MOVBE__ 1
 // CHECK_ZNVER1_M32: #define __PCLMUL__ 1
 // CHECK_ZNVER1_M32: #define __POPCNT__ 1
 // CHECK_ZNVER1_M32: #define __PRFCHW__ 1
@@ -2534,6 +2565,7 @@
 // CHECK_ZNVER1_M64: #define __FSGSBASE__ 1
 // CHECK_ZNVER1_M64: #define __LZCNT__ 1
 // CHECK_ZNVER1_M64: #define __MMX__ 1
+// CHECK_ZNVER1_M64: #define __MOVBE__ 1
 // CHECK_ZNVER1_M64: #define __PCLMUL__ 1
 // CHECK_ZNVER1_M64: #define __POPCNT__ 1
 // CHECK_ZNVER1_M64: #define __PRFCHW__ 1
Index: test/CodeGen/movbe-builtins.c
===================================================================
--- /dev/null
+++ test/CodeGen/movbe-builtins.c
@@ -0,0 +1,49 @@
+// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +movbe -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,CHECK-X64
+// RUN: %clang_cc1 -ffreestanding %s -triple=i686-apple-darwin -target-feature +movbe -emit-llvm -o - | FileCheck %s
+
+
+#include <immintrin.h>
+
+short test_loadbe_i16(const short *P) {
+  // CHECK-LABEL: @test_loadbe_i16
+  // CHECK: [[LOAD:%.*]] = load i16, i16* %{{.*}}, align 1
+  // CHECK: call i16 @llvm.bswap.i16(i16 [[LOAD]])
+  return _loadbe_i16(P);
+}
+
+void test_storebe_i16(short *P, short D) {
+  // CHECK-LABEL: @test_storebe_i16
+  // CHECK: [[DATA:%.*]] = call i16 @llvm.bswap.i16(i16 %{{.*}})
+  // CHECK: store i16 [[DATA]], i16* %{{.*}}, align 1
+  _storebe_i16(P, D);
+}
+
+int test_loadbe_i32(const int *P) {
+  // CHECK-LABEL: @test_loadbe_i32
+  // CHECK: [[LOAD:%.*]] = load i32, i32* %{{.*}}, align 1
+  // CHECK: call i32 @llvm.bswap.i32(i32 [[LOAD]])
+  return _loadbe_i32(P);
+}
+
+void test_storebe_i32(int *P, int D) {
+  // CHECK-LABEL: @test_storebe_i32
+  // CHECK: [[DATA:%.*]] = call i32 @llvm.bswap.i32(i32 %{{.*}})
+  // CHECK: store i32 [[DATA]], i32* %{{.*}}, align 1
+  _storebe_i32(P, D);
+}
+
+#ifdef __x86_64__
+long long test_loadbe_i64(const long long *P) {
+  // CHECK-X64-LABEL: @test_loadbe_i64
+  // CHECK-X64: [[LOAD:%.*]] = load i64, i64* %{{.*}}, align 1
+  // CHECK-X64: call i64 @llvm.bswap.i64(i64 [[LOAD]])
+  return _loadbe_i64(P);
+}
+
+void test_storebe_i64(long long *P, long long D) {
+  // CHECK-X64-LABEL: @test_storebe_i64
+  // CHECK-X64: [[DATA:%.*]] = call i64 @llvm.bswap.i64(i64 %{{.*}})
+  // CHECK-X64: store i64 [[DATA]], i64* %{{.*}}, align 1
+  _storebe_i64(P, D);
+}
+#endif
Index: lib/Headers/immintrin.h
===================================================================
--- lib/Headers/immintrin.h
+++ lib/Headers/immintrin.h
@@ -306,6 +306,65 @@
 #endif
 #endif /* __FSGSBASE__ */
 
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__MOVBE__)
+
+/* The structs used below are to force the load/store to be unaligned. This
+ * is accomplished with the __packed__ attribute. The __may_alias__ prevents
+ * tbaa metadata from being generated based on the struct and the type of the
+ * field inside of it.
+ */
+
+static __inline__ short __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
+_loadbe_i16(void const * __P) {
+  struct __loadu_i16 {
+    short __v;
+  } __attribute__((__packed__, __may_alias__));
+  return __builtin_bswap16(((struct __loadu_i16*)__P)->__v);
+}
+
+static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
+_storebe_i16(void * __P, short __D) {
+  struct __storeu_i16 {
+    short __v;
+  } __attribute__((__packed__, __may_alias__));
+  ((struct __storeu_i16*)__P)->__v = __builtin_bswap16(__D);
+}
+
+static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
+_loadbe_i32(void const * __P) {
+  struct __loadu_i32 {
+    int __v;
+  } __attribute__((__packed__, __may_alias__));
+  return __builtin_bswap32(((struct __loadu_i32*)__P)->__v);
+}
+
+static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
+_storebe_i32(void * __P, int __D) {
+  struct __storeu_i32 {
+    int __v;
+  } __attribute__((__packed__, __may_alias__));
+  ((struct __storeu_i32*)__P)->__v = __builtin_bswap32(__D);
+}
+
+#ifdef __x86_64__
+static __inline__ long long __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
+_loadbe_i64(void const * __P) {
+  struct __loadu_i64 {
+    long long __v;
+  } __attribute__((__packed__, __may_alias__));
+  return __builtin_bswap64(((struct __loadu_i64*)__P)->__v);
+}
+
+static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
+_storebe_i64(void * __P, long long __D) {
+  struct __storeu_i64 {
+    long long __v;
+  } __attribute__((__packed__, __may_alias__));
+  ((struct __storeu_i64*)__P)->__v = __builtin_bswap64(__D);
+}
+#endif
+#endif /* __MOVBE */
+
 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__RTM__)
 #include <rtmintrin.h>
 #include <xtestintrin.h>
Index: lib/Basic/Targets/X86.cpp
===================================================================
--- lib/Basic/Targets/X86.cpp
+++ lib/Basic/Targets/X86.cpp
@@ -1081,6 +1081,9 @@
   if (HasMWAITX)
     Builder.defineMacro("__MWAITX__");
 
+  if (HasMOVBE)
+    Builder.defineMacro("__MOVBE__");
+
   switch (XOPLevel) {
   case XOP:
     Builder.defineMacro("__XOP__");
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to