[PATCH] D52586: [X86] Add the movbe instruction intrinsics from icc.
This revision was automatically updated to reflect the committed changes. Closed by commit rL343343: [X86] Add the movbe instruction intrinsics from icc. (authored by ctopper, committed by ). Herald added a subscriber: llvm-commits. Changed prior to commit: https://reviews.llvm.org/D52586?vs=167416=167506#toc Repository: rL LLVM https://reviews.llvm.org/D52586 Files: cfe/trunk/lib/Basic/Targets/X86.cpp cfe/trunk/lib/Headers/immintrin.h cfe/trunk/test/CodeGen/movbe-builtins.c cfe/trunk/test/Preprocessor/predefined-arch-macros.c Index: cfe/trunk/test/CodeGen/movbe-builtins.c === --- cfe/trunk/test/CodeGen/movbe-builtins.c +++ cfe/trunk/test/CodeGen/movbe-builtins.c @@ -0,0 +1,49 @@ +// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +movbe -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,CHECK-X64 +// RUN: %clang_cc1 -ffreestanding %s -triple=i686-apple-darwin -target-feature +movbe -emit-llvm -o - | FileCheck %s + + +#include + +short test_loadbe_i16(const short *P) { + // CHECK-LABEL: @test_loadbe_i16 + // CHECK: [[LOAD:%.*]] = load i16, i16* %{{.*}}, align 1 + // CHECK: call i16 @llvm.bswap.i16(i16 [[LOAD]]) + return _loadbe_i16(P); +} + +void test_storebe_i16(short *P, short D) { + // CHECK-LABEL: @test_storebe_i16 + // CHECK: [[DATA:%.*]] = call i16 @llvm.bswap.i16(i16 %{{.*}}) + // CHECK: store i16 [[DATA]], i16* %{{.*}}, align 1 + _storebe_i16(P, D); +} + +int test_loadbe_i32(const int *P) { + // CHECK-LABEL: @test_loadbe_i32 + // CHECK: [[LOAD:%.*]] = load i32, i32* %{{.*}}, align 1 + // CHECK: call i32 @llvm.bswap.i32(i32 [[LOAD]]) + return _loadbe_i32(P); +} + +void test_storebe_i32(int *P, int D) { + // CHECK-LABEL: @test_storebe_i32 + // CHECK: [[DATA:%.*]] = call i32 @llvm.bswap.i32(i32 %{{.*}}) + // CHECK: store i32 [[DATA]], i32* %{{.*}}, align 1 + _storebe_i32(P, D); +} + +#ifdef __x86_64__ +long long test_loadbe_i64(const long long *P) { + // CHECK-X64-LABEL: @test_loadbe_i64 + // CHECK-X64: [[LOAD:%.*]] = load i64, i64* %{{.*}}, align 1 + // CHECK-X64: call i64 @llvm.bswap.i64(i64 [[LOAD]]) + return _loadbe_i64(P); +} + +void test_storebe_i64(long long *P, long long D) { + // CHECK-X64-LABEL: @test_storebe_i64 + // CHECK-X64: [[DATA:%.*]] = call i64 @llvm.bswap.i64(i64 %{{.*}}) + // CHECK-X64: store i64 [[DATA]], i64* %{{.*}}, align 1 + _storebe_i64(P, D); +} +#endif Index: cfe/trunk/test/Preprocessor/predefined-arch-macros.c === --- cfe/trunk/test/Preprocessor/predefined-arch-macros.c +++ cfe/trunk/test/Preprocessor/predefined-arch-macros.c @@ -524,6 +524,7 @@ // CHECK_CORE_AVX2_M32: #define __INVPCID__ 1 // CHECK_CORE_AVX2_M32: #define __LZCNT__ 1 // CHECK_CORE_AVX2_M32: #define __MMX__ 1 +// CHECK_CORE_AVX2_M32: #define __MOVBE__ 1 // CHECK_CORE_AVX2_M32: #define __PCLMUL__ 1 // CHECK_CORE_AVX2_M32: #define __POPCNT__ 1 // CHECK_CORE_AVX2_M32: #define __RDRND__ 1 @@ -554,6 +555,7 @@ // CHECK_CORE_AVX2_M64: #define __INVPCID__ 1 // CHECK_CORE_AVX2_M64: #define __LZCNT__ 1 // CHECK_CORE_AVX2_M64: #define __MMX__ 1 +// CHECK_CORE_AVX2_M64: #define __MOVBE__ 1 // CHECK_CORE_AVX2_M64: #define __PCLMUL__ 1 // CHECK_CORE_AVX2_M64: #define __POPCNT__ 1 // CHECK_CORE_AVX2_M64: #define __RDRND__ 1 @@ -588,6 +590,7 @@ // CHECK_BROADWELL_M32: #define __INVPCID__ 1 // CHECK_BROADWELL_M32: #define __LZCNT__ 1 // CHECK_BROADWELL_M32: #define __MMX__ 1 +// CHECK_BROADWELL_M32: #define __MOVBE__ 1 // CHECK_BROADWELL_M32: #define __PCLMUL__ 1 // CHECK_BROADWELL_M32: #define __POPCNT__ 1 // CHECK_BROADWELL_M32: #define __PRFCHW__ 1 @@ -621,6 +624,7 @@ // CHECK_BROADWELL_M64: #define __INVPCID__ 1 // CHECK_BROADWELL_M64: #define __LZCNT__ 1 // CHECK_BROADWELL_M64: #define __MMX__ 1 +// CHECK_BROADWELL_M64: #define __MOVBE__ 1 // CHECK_BROADWELL_M64: #define __PCLMUL__ 1 // CHECK_BROADWELL_M64: #define __POPCNT__ 1 // CHECK_BROADWELL_M64: #define __PRFCHW__ 1 @@ -659,6 +663,7 @@ // CHECK_SKL_M32: #define __INVPCID__ 1 // CHECK_SKL_M32: #define __LZCNT__ 1 // CHECK_SKL_M32: #define __MMX__ 1 +// CHECK_SKL_M32: #define __MOVBE__ 1 // CHECK_SKL_M32: #define __MPX__ 1 // CHECK_SKL_M32: #define __PCLMUL__ 1 // CHECK_SKL_M32: #define __POPCNT__ 1 @@ -694,6 +699,7 @@ // CHECK_SKL_M64: #define __INVPCID__ 1 // CHECK_SKL_M64: #define __LZCNT__ 1 // CHECK_SKL_M64: #define __MMX__ 1 +// CHECK_SKL_M64: #define __MOVBE__ 1 // CHECK_SKL_M64: #define __MPX__ 1 // CHECK_SKL_M64: #define __PCLMUL__ 1 // CHECK_SKL_M64: #define __POPCNT__ 1 @@ -735,6 +741,7 @@ // CHECK_KNL_M32: #define __FMA__ 1 // CHECK_KNL_M32: #define __LZCNT__ 1 // CHECK_KNL_M32: #define __MMX__ 1 +// CHECK_KNL_M32: #define __MOVBE__ 1 // CHECK_KNL_M32: #define __PCLMUL__ 1 // CHECK_KNL_M32: #define __POPCNT__ 1 // CHECK_KNL_M32: #define __PREFETCHWT1__ 1 @@ -772,6 +779,7 @@ // CHECK_KNL_M64: #define __FMA__ 1
[PATCH] D52586: [X86] Add the movbe instruction intrinsics from icc.
This revision was automatically updated to reflect the committed changes. Closed by commit rC343343: [X86] Add the movbe instruction intrinsics from icc. (authored by ctopper, committed by ). Repository: rC Clang https://reviews.llvm.org/D52586 Files: lib/Basic/Targets/X86.cpp lib/Headers/immintrin.h test/CodeGen/movbe-builtins.c test/Preprocessor/predefined-arch-macros.c Index: test/CodeGen/movbe-builtins.c === --- test/CodeGen/movbe-builtins.c +++ test/CodeGen/movbe-builtins.c @@ -0,0 +1,49 @@ +// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +movbe -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,CHECK-X64 +// RUN: %clang_cc1 -ffreestanding %s -triple=i686-apple-darwin -target-feature +movbe -emit-llvm -o - | FileCheck %s + + +#include + +short test_loadbe_i16(const short *P) { + // CHECK-LABEL: @test_loadbe_i16 + // CHECK: [[LOAD:%.*]] = load i16, i16* %{{.*}}, align 1 + // CHECK: call i16 @llvm.bswap.i16(i16 [[LOAD]]) + return _loadbe_i16(P); +} + +void test_storebe_i16(short *P, short D) { + // CHECK-LABEL: @test_storebe_i16 + // CHECK: [[DATA:%.*]] = call i16 @llvm.bswap.i16(i16 %{{.*}}) + // CHECK: store i16 [[DATA]], i16* %{{.*}}, align 1 + _storebe_i16(P, D); +} + +int test_loadbe_i32(const int *P) { + // CHECK-LABEL: @test_loadbe_i32 + // CHECK: [[LOAD:%.*]] = load i32, i32* %{{.*}}, align 1 + // CHECK: call i32 @llvm.bswap.i32(i32 [[LOAD]]) + return _loadbe_i32(P); +} + +void test_storebe_i32(int *P, int D) { + // CHECK-LABEL: @test_storebe_i32 + // CHECK: [[DATA:%.*]] = call i32 @llvm.bswap.i32(i32 %{{.*}}) + // CHECK: store i32 [[DATA]], i32* %{{.*}}, align 1 + _storebe_i32(P, D); +} + +#ifdef __x86_64__ +long long test_loadbe_i64(const long long *P) { + // CHECK-X64-LABEL: @test_loadbe_i64 + // CHECK-X64: [[LOAD:%.*]] = load i64, i64* %{{.*}}, align 1 + // CHECK-X64: call i64 @llvm.bswap.i64(i64 [[LOAD]]) + return _loadbe_i64(P); +} + +void test_storebe_i64(long long *P, long long D) { + // CHECK-X64-LABEL: @test_storebe_i64 + // CHECK-X64: [[DATA:%.*]] = call i64 @llvm.bswap.i64(i64 %{{.*}}) + // CHECK-X64: store i64 [[DATA]], i64* %{{.*}}, align 1 + _storebe_i64(P, D); +} +#endif Index: test/Preprocessor/predefined-arch-macros.c === --- test/Preprocessor/predefined-arch-macros.c +++ test/Preprocessor/predefined-arch-macros.c @@ -524,6 +524,7 @@ // CHECK_CORE_AVX2_M32: #define __INVPCID__ 1 // CHECK_CORE_AVX2_M32: #define __LZCNT__ 1 // CHECK_CORE_AVX2_M32: #define __MMX__ 1 +// CHECK_CORE_AVX2_M32: #define __MOVBE__ 1 // CHECK_CORE_AVX2_M32: #define __PCLMUL__ 1 // CHECK_CORE_AVX2_M32: #define __POPCNT__ 1 // CHECK_CORE_AVX2_M32: #define __RDRND__ 1 @@ -554,6 +555,7 @@ // CHECK_CORE_AVX2_M64: #define __INVPCID__ 1 // CHECK_CORE_AVX2_M64: #define __LZCNT__ 1 // CHECK_CORE_AVX2_M64: #define __MMX__ 1 +// CHECK_CORE_AVX2_M64: #define __MOVBE__ 1 // CHECK_CORE_AVX2_M64: #define __PCLMUL__ 1 // CHECK_CORE_AVX2_M64: #define __POPCNT__ 1 // CHECK_CORE_AVX2_M64: #define __RDRND__ 1 @@ -588,6 +590,7 @@ // CHECK_BROADWELL_M32: #define __INVPCID__ 1 // CHECK_BROADWELL_M32: #define __LZCNT__ 1 // CHECK_BROADWELL_M32: #define __MMX__ 1 +// CHECK_BROADWELL_M32: #define __MOVBE__ 1 // CHECK_BROADWELL_M32: #define __PCLMUL__ 1 // CHECK_BROADWELL_M32: #define __POPCNT__ 1 // CHECK_BROADWELL_M32: #define __PRFCHW__ 1 @@ -621,6 +624,7 @@ // CHECK_BROADWELL_M64: #define __INVPCID__ 1 // CHECK_BROADWELL_M64: #define __LZCNT__ 1 // CHECK_BROADWELL_M64: #define __MMX__ 1 +// CHECK_BROADWELL_M64: #define __MOVBE__ 1 // CHECK_BROADWELL_M64: #define __PCLMUL__ 1 // CHECK_BROADWELL_M64: #define __POPCNT__ 1 // CHECK_BROADWELL_M64: #define __PRFCHW__ 1 @@ -659,6 +663,7 @@ // CHECK_SKL_M32: #define __INVPCID__ 1 // CHECK_SKL_M32: #define __LZCNT__ 1 // CHECK_SKL_M32: #define __MMX__ 1 +// CHECK_SKL_M32: #define __MOVBE__ 1 // CHECK_SKL_M32: #define __MPX__ 1 // CHECK_SKL_M32: #define __PCLMUL__ 1 // CHECK_SKL_M32: #define __POPCNT__ 1 @@ -694,6 +699,7 @@ // CHECK_SKL_M64: #define __INVPCID__ 1 // CHECK_SKL_M64: #define __LZCNT__ 1 // CHECK_SKL_M64: #define __MMX__ 1 +// CHECK_SKL_M64: #define __MOVBE__ 1 // CHECK_SKL_M64: #define __MPX__ 1 // CHECK_SKL_M64: #define __PCLMUL__ 1 // CHECK_SKL_M64: #define __POPCNT__ 1 @@ -735,6 +741,7 @@ // CHECK_KNL_M32: #define __FMA__ 1 // CHECK_KNL_M32: #define __LZCNT__ 1 // CHECK_KNL_M32: #define __MMX__ 1 +// CHECK_KNL_M32: #define __MOVBE__ 1 // CHECK_KNL_M32: #define __PCLMUL__ 1 // CHECK_KNL_M32: #define __POPCNT__ 1 // CHECK_KNL_M32: #define __PREFETCHWT1__ 1 @@ -772,6 +779,7 @@ // CHECK_KNL_M64: #define __FMA__ 1 // CHECK_KNL_M64: #define __LZCNT__ 1 // CHECK_KNL_M64: #define __MMX__ 1 +// CHECK_KNL_M64: #define __MOVBE__ 1 // CHECK_KNL_M64: #define __PCLMUL__ 1 // CHECK_KNL_M64: #define __POPCNT__ 1 // CHECK_KNL_M64: #define
[PATCH] D52586: [X86] Add the movbe instruction intrinsics from icc.
spatel accepted this revision. spatel added a comment. This revision is now accepted and ready to land. LGTM https://reviews.llvm.org/D52586 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D52586: [X86] Add the movbe instruction intrinsics from icc.
craig.topper updated this revision to Diff 167416. craig.topper added a comment. Add comment. Fix typo. Add preprocessor define checks to the various CPUs that have MOVBE https://reviews.llvm.org/D52586 Files: lib/Basic/Targets/X86.cpp lib/Headers/immintrin.h test/CodeGen/movbe-builtins.c test/Preprocessor/predefined-arch-macros.c Index: test/Preprocessor/predefined-arch-macros.c === --- test/Preprocessor/predefined-arch-macros.c +++ test/Preprocessor/predefined-arch-macros.c @@ -524,6 +524,7 @@ // CHECK_CORE_AVX2_M32: #define __INVPCID__ 1 // CHECK_CORE_AVX2_M32: #define __LZCNT__ 1 // CHECK_CORE_AVX2_M32: #define __MMX__ 1 +// CHECK_CORE_AVX2_M32: #define __MOVBE__ 1 // CHECK_CORE_AVX2_M32: #define __PCLMUL__ 1 // CHECK_CORE_AVX2_M32: #define __POPCNT__ 1 // CHECK_CORE_AVX2_M32: #define __RDRND__ 1 @@ -554,6 +555,7 @@ // CHECK_CORE_AVX2_M64: #define __INVPCID__ 1 // CHECK_CORE_AVX2_M64: #define __LZCNT__ 1 // CHECK_CORE_AVX2_M64: #define __MMX__ 1 +// CHECK_CORE_AVX2_M64: #define __MOVBE__ 1 // CHECK_CORE_AVX2_M64: #define __PCLMUL__ 1 // CHECK_CORE_AVX2_M64: #define __POPCNT__ 1 // CHECK_CORE_AVX2_M64: #define __RDRND__ 1 @@ -588,6 +590,7 @@ // CHECK_BROADWELL_M32: #define __INVPCID__ 1 // CHECK_BROADWELL_M32: #define __LZCNT__ 1 // CHECK_BROADWELL_M32: #define __MMX__ 1 +// CHECK_BROADWELL_M32: #define __MOVBE__ 1 // CHECK_BROADWELL_M32: #define __PCLMUL__ 1 // CHECK_BROADWELL_M32: #define __POPCNT__ 1 // CHECK_BROADWELL_M32: #define __PRFCHW__ 1 @@ -621,6 +624,7 @@ // CHECK_BROADWELL_M64: #define __INVPCID__ 1 // CHECK_BROADWELL_M64: #define __LZCNT__ 1 // CHECK_BROADWELL_M64: #define __MMX__ 1 +// CHECK_BROADWELL_M64: #define __MOVBE__ 1 // CHECK_BROADWELL_M64: #define __PCLMUL__ 1 // CHECK_BROADWELL_M64: #define __POPCNT__ 1 // CHECK_BROADWELL_M64: #define __PRFCHW__ 1 @@ -659,6 +663,7 @@ // CHECK_SKL_M32: #define __INVPCID__ 1 // CHECK_SKL_M32: #define __LZCNT__ 1 // CHECK_SKL_M32: #define __MMX__ 1 +// CHECK_SKL_M32: #define __MOVBE__ 1 // CHECK_SKL_M32: #define __MPX__ 1 // CHECK_SKL_M32: #define __PCLMUL__ 1 // CHECK_SKL_M32: #define __POPCNT__ 1 @@ -694,6 +699,7 @@ // CHECK_SKL_M64: #define __INVPCID__ 1 // CHECK_SKL_M64: #define __LZCNT__ 1 // CHECK_SKL_M64: #define __MMX__ 1 +// CHECK_SKL_M64: #define __MOVBE__ 1 // CHECK_SKL_M64: #define __MPX__ 1 // CHECK_SKL_M64: #define __PCLMUL__ 1 // CHECK_SKL_M64: #define __POPCNT__ 1 @@ -735,6 +741,7 @@ // CHECK_KNL_M32: #define __FMA__ 1 // CHECK_KNL_M32: #define __LZCNT__ 1 // CHECK_KNL_M32: #define __MMX__ 1 +// CHECK_KNL_M32: #define __MOVBE__ 1 // CHECK_KNL_M32: #define __PCLMUL__ 1 // CHECK_KNL_M32: #define __POPCNT__ 1 // CHECK_KNL_M32: #define __PREFETCHWT1__ 1 @@ -772,6 +779,7 @@ // CHECK_KNL_M64: #define __FMA__ 1 // CHECK_KNL_M64: #define __LZCNT__ 1 // CHECK_KNL_M64: #define __MMX__ 1 +// CHECK_KNL_M64: #define __MOVBE__ 1 // CHECK_KNL_M64: #define __PCLMUL__ 1 // CHECK_KNL_M64: #define __POPCNT__ 1 // CHECK_KNL_M64: #define __PREFETCHWT1__ 1 @@ -813,6 +821,7 @@ // CHECK_KNM_M32: #define __FMA__ 1 // CHECK_KNM_M32: #define __LZCNT__ 1 // CHECK_KNM_M32: #define __MMX__ 1 +// CHECK_KNM_M32: #define __MOVBE__ 1 // CHECK_KNM_M32: #define __PCLMUL__ 1 // CHECK_KNM_M32: #define __POPCNT__ 1 // CHECK_KNM_M32: #define __PREFETCHWT1__ 1 @@ -848,6 +857,7 @@ // CHECK_KNM_M64: #define __FMA__ 1 // CHECK_KNM_M64: #define __LZCNT__ 1 // CHECK_KNM_M64: #define __MMX__ 1 +// CHECK_KNM_M64: #define __MOVBE__ 1 // CHECK_KNM_M64: #define __PCLMUL__ 1 // CHECK_KNM_M64: #define __POPCNT__ 1 // CHECK_KNM_M64: #define __PREFETCHWT1__ 1 @@ -889,6 +899,7 @@ // CHECK_SKX_M32: #define __INVPCID__ 1 // CHECK_SKX_M32: #define __LZCNT__ 1 // CHECK_SKX_M32: #define __MMX__ 1 +// CHECK_SKX_M32: #define __MOVBE__ 1 // CHECK_SKX_M32: #define __MPX__ 1 // CHECK_SKX_M32: #define __PCLMUL__ 1 // CHECK_SKX_M32: #define __PKU__ 1 @@ -935,6 +946,7 @@ // CHECK_SKX_M64: #define __INVPCID__ 1 // CHECK_SKX_M64: #define __LZCNT__ 1 // CHECK_SKX_M64: #define __MMX__ 1 +// CHECK_SKX_M64: #define __MOVBE__ 1 // CHECK_SKX_M64: #define __MPX__ 1 // CHECK_SKX_M64: #define __PCLMUL__ 1 // CHECK_SKX_M64: #define __PKU__ 1 @@ -986,6 +998,7 @@ // CHECK_CNL_M32: #define __INVPCID__ 1 // CHECK_CNL_M32: #define __LZCNT__ 1 // CHECK_CNL_M32: #define __MMX__ 1 +// CHECK_CNL_M32: #define __MOVBE__ 1 // CHECK_CNL_M32: #define __MPX__ 1 // CHECK_CNL_M32: #define __PCLMUL__ 1 // CHECK_CNL_M32: #define __PKU__ 1 @@ -1035,6 +1048,7 @@ // CHECK_CNL_M64: #define __INVPCID__ 1 // CHECK_CNL_M64: #define __LZCNT__ 1 // CHECK_CNL_M64: #define __MMX__ 1 +// CHECK_CNL_M64: #define __MOVBE__ 1 // CHECK_CNL_M64: #define __MPX__ 1 // CHECK_CNL_M64: #define __PCLMUL__ 1 // CHECK_CNL_M64: #define __PKU__ 1 @@ -1090,6 +1104,7 @@ // CHECK_ICL_M32: #define __INVPCID__ 1 // CHECK_ICL_M32: #define __LZCNT__ 1 // CHECK_ICL_M32: #define __MMX__ 1 +//
[PATCH] D52586: [X86] Add the movbe instruction intrinsics from icc.
spatel added a comment. The struct hack isn't obvious to me. Without that, we would produce a load with default alignment based on the size of the load (i132 -> align 4, etc)? But we want to force align 1 regardless of the load size, so the __packed__ attribute on the struct gets us that IIUC. What does __may_alias__ do? Explain this in a code comment in the header to make this less tricky? Comment at: lib/Headers/immintrin.h:359 +#endif +#endif /* __MOVEBE */ + MOVEBE -> MOVBE https://reviews.llvm.org/D52586 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D52586: [X86] Add the movbe instruction intrinsics from icc.
craig.topper created this revision. craig.topper added reviewers: spatel, RKSimon. These intrinsics exist in icc. They can be found on the Intel Intrinsics Guide website. All the backend support is in place to pattern match a load+bswap or a bswap+store pattern to the MOVBE instructions. So we just need to get the frontend to emit the correct IR. The pointer arguments in icc are declared as void so I had to jump through a packed struct to forcing a specific alignment on the load/store. Same trick we use in the unaligned vector load/store intrinsics https://reviews.llvm.org/D52586 Files: lib/Basic/Targets/X86.cpp lib/Headers/immintrin.h test/CodeGen/movbe-builtins.c Index: test/CodeGen/movbe-builtins.c === --- /dev/null +++ test/CodeGen/movbe-builtins.c @@ -0,0 +1,49 @@ +// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +movbe -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,CHECK-X64 +// RUN: %clang_cc1 -ffreestanding %s -triple=i686-apple-darwin -target-feature +movbe -emit-llvm -o - | FileCheck %s + + +#include + +short test_loadbe_i16(const short *P) { + // CHECK-LABEL: @test_loadbe_i16 + // CHECK: [[LOAD:%.*]] = load i16, i16* %{{.*}}, align 1 + // CHECK: call i16 @llvm.bswap.i16(i16 [[LOAD]]) + return _loadbe_i16(P); +} + +void test_storebe_i16(short *P, short D) { + // CHECK-LABEL: @test_storebe_i16 + // CHECK: [[DATA:%.*]] = call i16 @llvm.bswap.i16(i16 %{{.*}}) + // CHECK: store i16 [[DATA]], i16* %{{.*}}, align 1 + _storebe_i16(P, D); +} + +int test_loadbe_i32(const int *P) { + // CHECK-LABEL: @test_loadbe_i32 + // CHECK: [[LOAD:%.*]] = load i32, i32* %{{.*}}, align 1 + // CHECK: call i32 @llvm.bswap.i32(i32 [[LOAD]]) + return _loadbe_i32(P); +} + +void test_storebe_i32(int *P, int D) { + // CHECK-LABEL: @test_storebe_i32 + // CHECK: [[DATA:%.*]] = call i32 @llvm.bswap.i32(i32 %{{.*}}) + // CHECK: store i32 [[DATA]], i32* %{{.*}}, align 1 + _storebe_i32(P, D); +} + +#ifdef __x86_64__ +long long test_loadbe_i64(const long long *P) { + // CHECK-X64-LABEL: @test_loadbe_i64 + // CHECK-X64: [[LOAD:%.*]] = load i64, i64* %{{.*}}, align 1 + // CHECK-X64: call i64 @llvm.bswap.i64(i64 [[LOAD]]) + return _loadbe_i64(P); +} + +void test_storebe_i64(long long *P, long long D) { + // CHECK-X64-LABEL: @test_storebe_i64 + // CHECK-X64: [[DATA:%.*]] = call i64 @llvm.bswap.i64(i64 %{{.*}}) + // CHECK-X64: store i64 [[DATA]], i64* %{{.*}}, align 1 + _storebe_i64(P, D); +} +#endif Index: lib/Headers/immintrin.h === --- lib/Headers/immintrin.h +++ lib/Headers/immintrin.h @@ -306,6 +306,58 @@ #endif #endif /* __FSGSBASE__ */ +#if !defined(_MSC_VER) || __has_feature(modules) || defined(__MOVBE__) +static __inline__ short __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) +_loadbe_i16(void const * __P) { + struct __loadu_i16 { +short __v; + } __attribute__((__packed__, __may_alias__)); + return __builtin_bswap16(((struct __loadu_i16*)__P)->__v); +} + +static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) +_storebe_i16(void * __P, short __D) { + struct __storeu_i16 { +short __v; + } __attribute__((__packed__, __may_alias__)); + ((struct __storeu_i16*)__P)->__v = __builtin_bswap16(__D); +} + +static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) +_loadbe_i32(void const * __P) { + struct __loadu_i32 { +int __v; + } __attribute__((__packed__, __may_alias__)); + return __builtin_bswap32(((struct __loadu_i32*)__P)->__v); +} + +static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) +_storebe_i32(void * __P, int __D) { + struct __storeu_i32 { +int __v; + } __attribute__((__packed__, __may_alias__)); + ((struct __storeu_i32*)__P)->__v = __builtin_bswap32(__D); +} + +#ifdef __x86_64__ +static __inline__ long long __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) +_loadbe_i64(void const * __P) { + struct __loadu_i64 { +long long __v; + } __attribute__((__packed__, __may_alias__)); + return __builtin_bswap64(((struct __loadu_i64*)__P)->__v); +} + +static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) +_storebe_i64(void * __P, long long __D) { + struct __storeu_i64 { +long long __v; + } __attribute__((__packed__, __may_alias__)); + ((struct __storeu_i64*)__P)->__v = __builtin_bswap64(__D); +} +#endif +#endif /* __MOVEBE */ + #if !defined(_MSC_VER) || __has_feature(modules) || defined(__RTM__) #include #include Index: lib/Basic/Targets/X86.cpp === --- lib/Basic/Targets/X86.cpp +++ lib/Basic/Targets/X86.cpp @@ -1081,6 +1081,9 @@ if (HasMWAITX) Builder.defineMacro("__MWAITX__"); + if (HasMOVBE) +