[PATCH] D52586: [X86] Add the movbe instruction intrinsics from icc.

2018-09-28 Thread Phabricator via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rL343343: [X86] Add the movbe instruction intrinsics from icc. 
(authored by ctopper, committed by ).
Herald added a subscriber: llvm-commits.

Changed prior to commit:
  https://reviews.llvm.org/D52586?vs=167416=167506#toc

Repository:
  rL LLVM

https://reviews.llvm.org/D52586

Files:
  cfe/trunk/lib/Basic/Targets/X86.cpp
  cfe/trunk/lib/Headers/immintrin.h
  cfe/trunk/test/CodeGen/movbe-builtins.c
  cfe/trunk/test/Preprocessor/predefined-arch-macros.c

Index: cfe/trunk/test/CodeGen/movbe-builtins.c
===
--- cfe/trunk/test/CodeGen/movbe-builtins.c
+++ cfe/trunk/test/CodeGen/movbe-builtins.c
@@ -0,0 +1,49 @@
+// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +movbe -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,CHECK-X64
+// RUN: %clang_cc1 -ffreestanding %s -triple=i686-apple-darwin -target-feature +movbe -emit-llvm -o - | FileCheck %s
+
+
+#include 
+
+short test_loadbe_i16(const short *P) {
+  // CHECK-LABEL: @test_loadbe_i16
+  // CHECK: [[LOAD:%.*]] = load i16, i16* %{{.*}}, align 1
+  // CHECK: call i16 @llvm.bswap.i16(i16 [[LOAD]])
+  return _loadbe_i16(P);
+}
+
+void test_storebe_i16(short *P, short D) {
+  // CHECK-LABEL: @test_storebe_i16
+  // CHECK: [[DATA:%.*]] = call i16 @llvm.bswap.i16(i16 %{{.*}})
+  // CHECK: store i16 [[DATA]], i16* %{{.*}}, align 1
+  _storebe_i16(P, D);
+}
+
+int test_loadbe_i32(const int *P) {
+  // CHECK-LABEL: @test_loadbe_i32
+  // CHECK: [[LOAD:%.*]] = load i32, i32* %{{.*}}, align 1
+  // CHECK: call i32 @llvm.bswap.i32(i32 [[LOAD]])
+  return _loadbe_i32(P);
+}
+
+void test_storebe_i32(int *P, int D) {
+  // CHECK-LABEL: @test_storebe_i32
+  // CHECK: [[DATA:%.*]] = call i32 @llvm.bswap.i32(i32 %{{.*}})
+  // CHECK: store i32 [[DATA]], i32* %{{.*}}, align 1
+  _storebe_i32(P, D);
+}
+
+#ifdef __x86_64__
+long long test_loadbe_i64(const long long *P) {
+  // CHECK-X64-LABEL: @test_loadbe_i64
+  // CHECK-X64: [[LOAD:%.*]] = load i64, i64* %{{.*}}, align 1
+  // CHECK-X64: call i64 @llvm.bswap.i64(i64 [[LOAD]])
+  return _loadbe_i64(P);
+}
+
+void test_storebe_i64(long long *P, long long D) {
+  // CHECK-X64-LABEL: @test_storebe_i64
+  // CHECK-X64: [[DATA:%.*]] = call i64 @llvm.bswap.i64(i64 %{{.*}})
+  // CHECK-X64: store i64 [[DATA]], i64* %{{.*}}, align 1
+  _storebe_i64(P, D);
+}
+#endif
Index: cfe/trunk/test/Preprocessor/predefined-arch-macros.c
===
--- cfe/trunk/test/Preprocessor/predefined-arch-macros.c
+++ cfe/trunk/test/Preprocessor/predefined-arch-macros.c
@@ -524,6 +524,7 @@
 // CHECK_CORE_AVX2_M32: #define __INVPCID__ 1
 // CHECK_CORE_AVX2_M32: #define __LZCNT__ 1
 // CHECK_CORE_AVX2_M32: #define __MMX__ 1
+// CHECK_CORE_AVX2_M32: #define __MOVBE__ 1
 // CHECK_CORE_AVX2_M32: #define __PCLMUL__ 1
 // CHECK_CORE_AVX2_M32: #define __POPCNT__ 1
 // CHECK_CORE_AVX2_M32: #define __RDRND__ 1
@@ -554,6 +555,7 @@
 // CHECK_CORE_AVX2_M64: #define __INVPCID__ 1
 // CHECK_CORE_AVX2_M64: #define __LZCNT__ 1
 // CHECK_CORE_AVX2_M64: #define __MMX__ 1
+// CHECK_CORE_AVX2_M64: #define __MOVBE__ 1
 // CHECK_CORE_AVX2_M64: #define __PCLMUL__ 1
 // CHECK_CORE_AVX2_M64: #define __POPCNT__ 1
 // CHECK_CORE_AVX2_M64: #define __RDRND__ 1
@@ -588,6 +590,7 @@
 // CHECK_BROADWELL_M32: #define __INVPCID__ 1
 // CHECK_BROADWELL_M32: #define __LZCNT__ 1
 // CHECK_BROADWELL_M32: #define __MMX__ 1
+// CHECK_BROADWELL_M32: #define __MOVBE__ 1
 // CHECK_BROADWELL_M32: #define __PCLMUL__ 1
 // CHECK_BROADWELL_M32: #define __POPCNT__ 1
 // CHECK_BROADWELL_M32: #define __PRFCHW__ 1
@@ -621,6 +624,7 @@
 // CHECK_BROADWELL_M64: #define __INVPCID__ 1
 // CHECK_BROADWELL_M64: #define __LZCNT__ 1
 // CHECK_BROADWELL_M64: #define __MMX__ 1
+// CHECK_BROADWELL_M64: #define __MOVBE__ 1
 // CHECK_BROADWELL_M64: #define __PCLMUL__ 1
 // CHECK_BROADWELL_M64: #define __POPCNT__ 1
 // CHECK_BROADWELL_M64: #define __PRFCHW__ 1
@@ -659,6 +663,7 @@
 // CHECK_SKL_M32: #define __INVPCID__ 1
 // CHECK_SKL_M32: #define __LZCNT__ 1
 // CHECK_SKL_M32: #define __MMX__ 1
+// CHECK_SKL_M32: #define __MOVBE__ 1
 // CHECK_SKL_M32: #define __MPX__ 1
 // CHECK_SKL_M32: #define __PCLMUL__ 1
 // CHECK_SKL_M32: #define __POPCNT__ 1
@@ -694,6 +699,7 @@
 // CHECK_SKL_M64: #define __INVPCID__ 1
 // CHECK_SKL_M64: #define __LZCNT__ 1
 // CHECK_SKL_M64: #define __MMX__ 1
+// CHECK_SKL_M64: #define __MOVBE__ 1
 // CHECK_SKL_M64: #define __MPX__ 1
 // CHECK_SKL_M64: #define __PCLMUL__ 1
 // CHECK_SKL_M64: #define __POPCNT__ 1
@@ -735,6 +741,7 @@
 // CHECK_KNL_M32: #define __FMA__ 1
 // CHECK_KNL_M32: #define __LZCNT__ 1
 // CHECK_KNL_M32: #define __MMX__ 1
+// CHECK_KNL_M32: #define __MOVBE__ 1
 // CHECK_KNL_M32: #define __PCLMUL__ 1
 // CHECK_KNL_M32: #define __POPCNT__ 1
 // CHECK_KNL_M32: #define __PREFETCHWT1__ 1
@@ -772,6 +779,7 @@
 // CHECK_KNL_M64: #define __FMA__ 1
 

[PATCH] D52586: [X86] Add the movbe instruction intrinsics from icc.

2018-09-28 Thread Phabricator via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rC343343: [X86] Add the movbe instruction intrinsics from icc. 
(authored by ctopper, committed by ).

Repository:
  rC Clang

https://reviews.llvm.org/D52586

Files:
  lib/Basic/Targets/X86.cpp
  lib/Headers/immintrin.h
  test/CodeGen/movbe-builtins.c
  test/Preprocessor/predefined-arch-macros.c

Index: test/CodeGen/movbe-builtins.c
===
--- test/CodeGen/movbe-builtins.c
+++ test/CodeGen/movbe-builtins.c
@@ -0,0 +1,49 @@
+// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +movbe -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,CHECK-X64
+// RUN: %clang_cc1 -ffreestanding %s -triple=i686-apple-darwin -target-feature +movbe -emit-llvm -o - | FileCheck %s
+
+
+#include 
+
+short test_loadbe_i16(const short *P) {
+  // CHECK-LABEL: @test_loadbe_i16
+  // CHECK: [[LOAD:%.*]] = load i16, i16* %{{.*}}, align 1
+  // CHECK: call i16 @llvm.bswap.i16(i16 [[LOAD]])
+  return _loadbe_i16(P);
+}
+
+void test_storebe_i16(short *P, short D) {
+  // CHECK-LABEL: @test_storebe_i16
+  // CHECK: [[DATA:%.*]] = call i16 @llvm.bswap.i16(i16 %{{.*}})
+  // CHECK: store i16 [[DATA]], i16* %{{.*}}, align 1
+  _storebe_i16(P, D);
+}
+
+int test_loadbe_i32(const int *P) {
+  // CHECK-LABEL: @test_loadbe_i32
+  // CHECK: [[LOAD:%.*]] = load i32, i32* %{{.*}}, align 1
+  // CHECK: call i32 @llvm.bswap.i32(i32 [[LOAD]])
+  return _loadbe_i32(P);
+}
+
+void test_storebe_i32(int *P, int D) {
+  // CHECK-LABEL: @test_storebe_i32
+  // CHECK: [[DATA:%.*]] = call i32 @llvm.bswap.i32(i32 %{{.*}})
+  // CHECK: store i32 [[DATA]], i32* %{{.*}}, align 1
+  _storebe_i32(P, D);
+}
+
+#ifdef __x86_64__
+long long test_loadbe_i64(const long long *P) {
+  // CHECK-X64-LABEL: @test_loadbe_i64
+  // CHECK-X64: [[LOAD:%.*]] = load i64, i64* %{{.*}}, align 1
+  // CHECK-X64: call i64 @llvm.bswap.i64(i64 [[LOAD]])
+  return _loadbe_i64(P);
+}
+
+void test_storebe_i64(long long *P, long long D) {
+  // CHECK-X64-LABEL: @test_storebe_i64
+  // CHECK-X64: [[DATA:%.*]] = call i64 @llvm.bswap.i64(i64 %{{.*}})
+  // CHECK-X64: store i64 [[DATA]], i64* %{{.*}}, align 1
+  _storebe_i64(P, D);
+}
+#endif
Index: test/Preprocessor/predefined-arch-macros.c
===
--- test/Preprocessor/predefined-arch-macros.c
+++ test/Preprocessor/predefined-arch-macros.c
@@ -524,6 +524,7 @@
 // CHECK_CORE_AVX2_M32: #define __INVPCID__ 1
 // CHECK_CORE_AVX2_M32: #define __LZCNT__ 1
 // CHECK_CORE_AVX2_M32: #define __MMX__ 1
+// CHECK_CORE_AVX2_M32: #define __MOVBE__ 1
 // CHECK_CORE_AVX2_M32: #define __PCLMUL__ 1
 // CHECK_CORE_AVX2_M32: #define __POPCNT__ 1
 // CHECK_CORE_AVX2_M32: #define __RDRND__ 1
@@ -554,6 +555,7 @@
 // CHECK_CORE_AVX2_M64: #define __INVPCID__ 1
 // CHECK_CORE_AVX2_M64: #define __LZCNT__ 1
 // CHECK_CORE_AVX2_M64: #define __MMX__ 1
+// CHECK_CORE_AVX2_M64: #define __MOVBE__ 1
 // CHECK_CORE_AVX2_M64: #define __PCLMUL__ 1
 // CHECK_CORE_AVX2_M64: #define __POPCNT__ 1
 // CHECK_CORE_AVX2_M64: #define __RDRND__ 1
@@ -588,6 +590,7 @@
 // CHECK_BROADWELL_M32: #define __INVPCID__ 1
 // CHECK_BROADWELL_M32: #define __LZCNT__ 1
 // CHECK_BROADWELL_M32: #define __MMX__ 1
+// CHECK_BROADWELL_M32: #define __MOVBE__ 1
 // CHECK_BROADWELL_M32: #define __PCLMUL__ 1
 // CHECK_BROADWELL_M32: #define __POPCNT__ 1
 // CHECK_BROADWELL_M32: #define __PRFCHW__ 1
@@ -621,6 +624,7 @@
 // CHECK_BROADWELL_M64: #define __INVPCID__ 1
 // CHECK_BROADWELL_M64: #define __LZCNT__ 1
 // CHECK_BROADWELL_M64: #define __MMX__ 1
+// CHECK_BROADWELL_M64: #define __MOVBE__ 1
 // CHECK_BROADWELL_M64: #define __PCLMUL__ 1
 // CHECK_BROADWELL_M64: #define __POPCNT__ 1
 // CHECK_BROADWELL_M64: #define __PRFCHW__ 1
@@ -659,6 +663,7 @@
 // CHECK_SKL_M32: #define __INVPCID__ 1
 // CHECK_SKL_M32: #define __LZCNT__ 1
 // CHECK_SKL_M32: #define __MMX__ 1
+// CHECK_SKL_M32: #define __MOVBE__ 1
 // CHECK_SKL_M32: #define __MPX__ 1
 // CHECK_SKL_M32: #define __PCLMUL__ 1
 // CHECK_SKL_M32: #define __POPCNT__ 1
@@ -694,6 +699,7 @@
 // CHECK_SKL_M64: #define __INVPCID__ 1
 // CHECK_SKL_M64: #define __LZCNT__ 1
 // CHECK_SKL_M64: #define __MMX__ 1
+// CHECK_SKL_M64: #define __MOVBE__ 1
 // CHECK_SKL_M64: #define __MPX__ 1
 // CHECK_SKL_M64: #define __PCLMUL__ 1
 // CHECK_SKL_M64: #define __POPCNT__ 1
@@ -735,6 +741,7 @@
 // CHECK_KNL_M32: #define __FMA__ 1
 // CHECK_KNL_M32: #define __LZCNT__ 1
 // CHECK_KNL_M32: #define __MMX__ 1
+// CHECK_KNL_M32: #define __MOVBE__ 1
 // CHECK_KNL_M32: #define __PCLMUL__ 1
 // CHECK_KNL_M32: #define __POPCNT__ 1
 // CHECK_KNL_M32: #define __PREFETCHWT1__ 1
@@ -772,6 +779,7 @@
 // CHECK_KNL_M64: #define __FMA__ 1
 // CHECK_KNL_M64: #define __LZCNT__ 1
 // CHECK_KNL_M64: #define __MMX__ 1
+// CHECK_KNL_M64: #define __MOVBE__ 1
 // CHECK_KNL_M64: #define __PCLMUL__ 1
 // CHECK_KNL_M64: #define __POPCNT__ 1
 // CHECK_KNL_M64: #define 

[PATCH] D52586: [X86] Add the movbe instruction intrinsics from icc.

2018-09-28 Thread Sanjay Patel via Phabricator via cfe-commits
spatel accepted this revision.
spatel added a comment.
This revision is now accepted and ready to land.

LGTM


https://reviews.llvm.org/D52586



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D52586: [X86] Add the movbe instruction intrinsics from icc.

2018-09-27 Thread Craig Topper via Phabricator via cfe-commits
craig.topper updated this revision to Diff 167416.
craig.topper added a comment.

Add comment. Fix typo. Add preprocessor define checks to the various CPUs that 
have MOVBE


https://reviews.llvm.org/D52586

Files:
  lib/Basic/Targets/X86.cpp
  lib/Headers/immintrin.h
  test/CodeGen/movbe-builtins.c
  test/Preprocessor/predefined-arch-macros.c

Index: test/Preprocessor/predefined-arch-macros.c
===
--- test/Preprocessor/predefined-arch-macros.c
+++ test/Preprocessor/predefined-arch-macros.c
@@ -524,6 +524,7 @@
 // CHECK_CORE_AVX2_M32: #define __INVPCID__ 1
 // CHECK_CORE_AVX2_M32: #define __LZCNT__ 1
 // CHECK_CORE_AVX2_M32: #define __MMX__ 1
+// CHECK_CORE_AVX2_M32: #define __MOVBE__ 1
 // CHECK_CORE_AVX2_M32: #define __PCLMUL__ 1
 // CHECK_CORE_AVX2_M32: #define __POPCNT__ 1
 // CHECK_CORE_AVX2_M32: #define __RDRND__ 1
@@ -554,6 +555,7 @@
 // CHECK_CORE_AVX2_M64: #define __INVPCID__ 1
 // CHECK_CORE_AVX2_M64: #define __LZCNT__ 1
 // CHECK_CORE_AVX2_M64: #define __MMX__ 1
+// CHECK_CORE_AVX2_M64: #define __MOVBE__ 1
 // CHECK_CORE_AVX2_M64: #define __PCLMUL__ 1
 // CHECK_CORE_AVX2_M64: #define __POPCNT__ 1
 // CHECK_CORE_AVX2_M64: #define __RDRND__ 1
@@ -588,6 +590,7 @@
 // CHECK_BROADWELL_M32: #define __INVPCID__ 1
 // CHECK_BROADWELL_M32: #define __LZCNT__ 1
 // CHECK_BROADWELL_M32: #define __MMX__ 1
+// CHECK_BROADWELL_M32: #define __MOVBE__ 1
 // CHECK_BROADWELL_M32: #define __PCLMUL__ 1
 // CHECK_BROADWELL_M32: #define __POPCNT__ 1
 // CHECK_BROADWELL_M32: #define __PRFCHW__ 1
@@ -621,6 +624,7 @@
 // CHECK_BROADWELL_M64: #define __INVPCID__ 1
 // CHECK_BROADWELL_M64: #define __LZCNT__ 1
 // CHECK_BROADWELL_M64: #define __MMX__ 1
+// CHECK_BROADWELL_M64: #define __MOVBE__ 1
 // CHECK_BROADWELL_M64: #define __PCLMUL__ 1
 // CHECK_BROADWELL_M64: #define __POPCNT__ 1
 // CHECK_BROADWELL_M64: #define __PRFCHW__ 1
@@ -659,6 +663,7 @@
 // CHECK_SKL_M32: #define __INVPCID__ 1
 // CHECK_SKL_M32: #define __LZCNT__ 1
 // CHECK_SKL_M32: #define __MMX__ 1
+// CHECK_SKL_M32: #define __MOVBE__ 1
 // CHECK_SKL_M32: #define __MPX__ 1
 // CHECK_SKL_M32: #define __PCLMUL__ 1
 // CHECK_SKL_M32: #define __POPCNT__ 1
@@ -694,6 +699,7 @@
 // CHECK_SKL_M64: #define __INVPCID__ 1
 // CHECK_SKL_M64: #define __LZCNT__ 1
 // CHECK_SKL_M64: #define __MMX__ 1
+// CHECK_SKL_M64: #define __MOVBE__ 1
 // CHECK_SKL_M64: #define __MPX__ 1
 // CHECK_SKL_M64: #define __PCLMUL__ 1
 // CHECK_SKL_M64: #define __POPCNT__ 1
@@ -735,6 +741,7 @@
 // CHECK_KNL_M32: #define __FMA__ 1
 // CHECK_KNL_M32: #define __LZCNT__ 1
 // CHECK_KNL_M32: #define __MMX__ 1
+// CHECK_KNL_M32: #define __MOVBE__ 1
 // CHECK_KNL_M32: #define __PCLMUL__ 1
 // CHECK_KNL_M32: #define __POPCNT__ 1
 // CHECK_KNL_M32: #define __PREFETCHWT1__ 1
@@ -772,6 +779,7 @@
 // CHECK_KNL_M64: #define __FMA__ 1
 // CHECK_KNL_M64: #define __LZCNT__ 1
 // CHECK_KNL_M64: #define __MMX__ 1
+// CHECK_KNL_M64: #define __MOVBE__ 1
 // CHECK_KNL_M64: #define __PCLMUL__ 1
 // CHECK_KNL_M64: #define __POPCNT__ 1
 // CHECK_KNL_M64: #define __PREFETCHWT1__ 1
@@ -813,6 +821,7 @@
 // CHECK_KNM_M32: #define __FMA__ 1
 // CHECK_KNM_M32: #define __LZCNT__ 1
 // CHECK_KNM_M32: #define __MMX__ 1
+// CHECK_KNM_M32: #define __MOVBE__ 1
 // CHECK_KNM_M32: #define __PCLMUL__ 1
 // CHECK_KNM_M32: #define __POPCNT__ 1
 // CHECK_KNM_M32: #define __PREFETCHWT1__ 1
@@ -848,6 +857,7 @@
 // CHECK_KNM_M64: #define __FMA__ 1
 // CHECK_KNM_M64: #define __LZCNT__ 1
 // CHECK_KNM_M64: #define __MMX__ 1
+// CHECK_KNM_M64: #define __MOVBE__ 1
 // CHECK_KNM_M64: #define __PCLMUL__ 1
 // CHECK_KNM_M64: #define __POPCNT__ 1
 // CHECK_KNM_M64: #define __PREFETCHWT1__ 1
@@ -889,6 +899,7 @@
 // CHECK_SKX_M32: #define __INVPCID__ 1
 // CHECK_SKX_M32: #define __LZCNT__ 1
 // CHECK_SKX_M32: #define __MMX__ 1
+// CHECK_SKX_M32: #define __MOVBE__ 1
 // CHECK_SKX_M32: #define __MPX__ 1
 // CHECK_SKX_M32: #define __PCLMUL__ 1
 // CHECK_SKX_M32: #define __PKU__ 1
@@ -935,6 +946,7 @@
 // CHECK_SKX_M64: #define __INVPCID__ 1
 // CHECK_SKX_M64: #define __LZCNT__ 1
 // CHECK_SKX_M64: #define __MMX__ 1
+// CHECK_SKX_M64: #define __MOVBE__ 1
 // CHECK_SKX_M64: #define __MPX__ 1
 // CHECK_SKX_M64: #define __PCLMUL__ 1
 // CHECK_SKX_M64: #define __PKU__ 1
@@ -986,6 +998,7 @@
 // CHECK_CNL_M32: #define __INVPCID__ 1
 // CHECK_CNL_M32: #define __LZCNT__ 1
 // CHECK_CNL_M32: #define __MMX__ 1
+// CHECK_CNL_M32: #define __MOVBE__ 1
 // CHECK_CNL_M32: #define __MPX__ 1
 // CHECK_CNL_M32: #define __PCLMUL__ 1
 // CHECK_CNL_M32: #define __PKU__ 1
@@ -1035,6 +1048,7 @@
 // CHECK_CNL_M64: #define __INVPCID__ 1
 // CHECK_CNL_M64: #define __LZCNT__ 1
 // CHECK_CNL_M64: #define __MMX__ 1
+// CHECK_CNL_M64: #define __MOVBE__ 1
 // CHECK_CNL_M64: #define __MPX__ 1
 // CHECK_CNL_M64: #define __PCLMUL__ 1
 // CHECK_CNL_M64: #define __PKU__ 1
@@ -1090,6 +1104,7 @@
 // CHECK_ICL_M32: #define __INVPCID__ 1
 // CHECK_ICL_M32: #define __LZCNT__ 1
 // CHECK_ICL_M32: #define __MMX__ 1
+// 

[PATCH] D52586: [X86] Add the movbe instruction intrinsics from icc.

2018-09-27 Thread Sanjay Patel via Phabricator via cfe-commits
spatel added a comment.

The struct hack isn't obvious to me. Without that, we would produce a load with 
default alignment based on the size of the load (i132 -> align 4, etc)? But we 
want to force align 1 regardless of the load size, so the __packed__ attribute 
on the struct gets us that IIUC. What does __may_alias__ do?

Explain this in a code comment in the header to make this less tricky?




Comment at: lib/Headers/immintrin.h:359
+#endif
+#endif /* __MOVEBE */
+

MOVEBE -> MOVBE


https://reviews.llvm.org/D52586



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D52586: [X86] Add the movbe instruction intrinsics from icc.

2018-09-26 Thread Craig Topper via Phabricator via cfe-commits
craig.topper created this revision.
craig.topper added reviewers: spatel, RKSimon.

These intrinsics exist in icc. They can be found on the Intel Intrinsics Guide 
website.

All the backend support is in place to pattern match a load+bswap or a 
bswap+store pattern to the MOVBE instructions. So we just need to get the 
frontend to emit the correct IR. The pointer arguments in icc are declared as 
void so I had to jump through a packed struct to forcing a specific alignment 
on the load/store. Same trick we use in the unaligned vector load/store 
intrinsics


https://reviews.llvm.org/D52586

Files:
  lib/Basic/Targets/X86.cpp
  lib/Headers/immintrin.h
  test/CodeGen/movbe-builtins.c

Index: test/CodeGen/movbe-builtins.c
===
--- /dev/null
+++ test/CodeGen/movbe-builtins.c
@@ -0,0 +1,49 @@
+// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +movbe -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,CHECK-X64
+// RUN: %clang_cc1 -ffreestanding %s -triple=i686-apple-darwin -target-feature +movbe -emit-llvm -o - | FileCheck %s
+
+
+#include 
+
+short test_loadbe_i16(const short *P) {
+  // CHECK-LABEL: @test_loadbe_i16
+  // CHECK: [[LOAD:%.*]] = load i16, i16* %{{.*}}, align 1
+  // CHECK: call i16 @llvm.bswap.i16(i16 [[LOAD]])
+  return _loadbe_i16(P);
+}
+
+void test_storebe_i16(short *P, short D) {
+  // CHECK-LABEL: @test_storebe_i16
+  // CHECK: [[DATA:%.*]] = call i16 @llvm.bswap.i16(i16 %{{.*}})
+  // CHECK: store i16 [[DATA]], i16* %{{.*}}, align 1
+  _storebe_i16(P, D);
+}
+
+int test_loadbe_i32(const int *P) {
+  // CHECK-LABEL: @test_loadbe_i32
+  // CHECK: [[LOAD:%.*]] = load i32, i32* %{{.*}}, align 1
+  // CHECK: call i32 @llvm.bswap.i32(i32 [[LOAD]])
+  return _loadbe_i32(P);
+}
+
+void test_storebe_i32(int *P, int D) {
+  // CHECK-LABEL: @test_storebe_i32
+  // CHECK: [[DATA:%.*]] = call i32 @llvm.bswap.i32(i32 %{{.*}})
+  // CHECK: store i32 [[DATA]], i32* %{{.*}}, align 1
+  _storebe_i32(P, D);
+}
+
+#ifdef __x86_64__
+long long test_loadbe_i64(const long long *P) {
+  // CHECK-X64-LABEL: @test_loadbe_i64
+  // CHECK-X64: [[LOAD:%.*]] = load i64, i64* %{{.*}}, align 1
+  // CHECK-X64: call i64 @llvm.bswap.i64(i64 [[LOAD]])
+  return _loadbe_i64(P);
+}
+
+void test_storebe_i64(long long *P, long long D) {
+  // CHECK-X64-LABEL: @test_storebe_i64
+  // CHECK-X64: [[DATA:%.*]] = call i64 @llvm.bswap.i64(i64 %{{.*}})
+  // CHECK-X64: store i64 [[DATA]], i64* %{{.*}}, align 1
+  _storebe_i64(P, D);
+}
+#endif
Index: lib/Headers/immintrin.h
===
--- lib/Headers/immintrin.h
+++ lib/Headers/immintrin.h
@@ -306,6 +306,58 @@
 #endif
 #endif /* __FSGSBASE__ */
 
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__MOVBE__)
+static __inline__ short __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
+_loadbe_i16(void const * __P) {
+  struct __loadu_i16 {
+short __v;
+  } __attribute__((__packed__, __may_alias__));
+  return __builtin_bswap16(((struct __loadu_i16*)__P)->__v);
+}
+
+static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
+_storebe_i16(void * __P, short __D) {
+  struct __storeu_i16 {
+short __v;
+  } __attribute__((__packed__, __may_alias__));
+  ((struct __storeu_i16*)__P)->__v = __builtin_bswap16(__D);
+}
+
+static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
+_loadbe_i32(void const * __P) {
+  struct __loadu_i32 {
+int __v;
+  } __attribute__((__packed__, __may_alias__));
+  return __builtin_bswap32(((struct __loadu_i32*)__P)->__v);
+}
+
+static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
+_storebe_i32(void * __P, int __D) {
+  struct __storeu_i32 {
+int __v;
+  } __attribute__((__packed__, __may_alias__));
+  ((struct __storeu_i32*)__P)->__v = __builtin_bswap32(__D);
+}
+
+#ifdef __x86_64__
+static __inline__ long long __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
+_loadbe_i64(void const * __P) {
+  struct __loadu_i64 {
+long long __v;
+  } __attribute__((__packed__, __may_alias__));
+  return __builtin_bswap64(((struct __loadu_i64*)__P)->__v);
+}
+
+static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
+_storebe_i64(void * __P, long long __D) {
+  struct __storeu_i64 {
+long long __v;
+  } __attribute__((__packed__, __may_alias__));
+  ((struct __storeu_i64*)__P)->__v = __builtin_bswap64(__D);
+}
+#endif
+#endif /* __MOVEBE */
+
 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__RTM__)
 #include 
 #include 
Index: lib/Basic/Targets/X86.cpp
===
--- lib/Basic/Targets/X86.cpp
+++ lib/Basic/Targets/X86.cpp
@@ -1081,6 +1081,9 @@
   if (HasMWAITX)
 Builder.defineMacro("__MWAITX__");
 
+  if (HasMOVBE)
+