Hi spatel,
Clang counterpart to D10555:
While working on PR23464, the broadcast intrinsics annoyed me. Let's remove
them: it's one of the simplest shuffle kind, IR is good enough
http://reviews.llvm.org/D10556
Files:
include/clang/Basic/BuiltinsX86.def
lib/Headers/avx2intrin.h
test/CodeGen/avx2-builtins.c
EMAIL PREFERENCES
http://reviews.llvm.org/settings/panel/emailpreferences/
Index: include/clang/Basic/BuiltinsX86.def
===================================================================
--- include/clang/Basic/BuiltinsX86.def
+++ include/clang/Basic/BuiltinsX86.def
@@ -573,17 +573,6 @@
BUILTIN(__builtin_ia32_psrlqi256, "V4LLiV4LLii", "")
BUILTIN(__builtin_ia32_psrlq256, "V4LLiV4LLiV2LLi", "")
BUILTIN(__builtin_ia32_movntdqa256, "V4LLiV4LLi*", "")
-BUILTIN(__builtin_ia32_vbroadcastss_ps, "V4fV4f", "")
-BUILTIN(__builtin_ia32_vbroadcastss_ps256, "V8fV4f", "")
-BUILTIN(__builtin_ia32_vbroadcastsd_pd256, "V4dV2d", "")
-BUILTIN(__builtin_ia32_pbroadcastb256, "V32cV16c", "")
-BUILTIN(__builtin_ia32_pbroadcastw256, "V16sV8s", "")
-BUILTIN(__builtin_ia32_pbroadcastd256, "V8iV4i", "")
-BUILTIN(__builtin_ia32_pbroadcastq256, "V4LLiV2LLi", "")
-BUILTIN(__builtin_ia32_pbroadcastb128, "V16cV16c", "")
-BUILTIN(__builtin_ia32_pbroadcastw128, "V8sV8s", "")
-BUILTIN(__builtin_ia32_pbroadcastd128, "V4iV4i", "")
-BUILTIN(__builtin_ia32_pbroadcastq128, "V2LLiV2LLi", "")
BUILTIN(__builtin_ia32_permvarsi256, "V8iV8iV8i", "")
BUILTIN(__builtin_ia32_permvarsf256, "V8fV8fV8f", "")
BUILTIN(__builtin_ia32_permti256, "V4LLiV4LLiV4LLiIc", "")
Index: lib/Headers/avx2intrin.h
===================================================================
--- lib/Headers/avx2intrin.h
+++ lib/Headers/avx2intrin.h
@@ -758,9 +758,9 @@
}
static __inline__ __m128 DEFAULT_FN_ATTRS
-_mm_broadcastss_ps(__m128 __X)
+_mm_broadcastss_ps(__m128 __a)
{
- return (__m128)__builtin_ia32_vbroadcastss_ps((__v4sf)__X);
+ return __builtin_shufflevector(__a, __a, 0, 0, 0, 0);
}
static __inline__ __m128d DEFAULT_FN_ATTRS
@@ -770,21 +770,21 @@
}
static __inline__ __m256 DEFAULT_FN_ATTRS
-_mm256_broadcastss_ps(__m128 __X)
+_mm256_broadcastss_ps(__m128 __a)
{
- return (__m256)__builtin_ia32_vbroadcastss_ps256((__v4sf)__X);
+ return __builtin_shufflevector(__a, __a, 0, 0, 0, 0, 0, 0, 0, 0);
}
static __inline__ __m256d DEFAULT_FN_ATTRS
-_mm256_broadcastsd_pd(__m128d __X)
+_mm256_broadcastsd_pd(__m128d __a)
{
- return (__m256d)__builtin_ia32_vbroadcastsd_pd256((__v2df)__X);
+ return __builtin_shufflevector(__a, __a, 0, 0, 0, 0);
}
static __inline__ __m256i DEFAULT_FN_ATTRS
-_mm256_broadcastsi128_si256(__m128i __X)
+_mm256_broadcastsi128_si256(__m128i __a)
{
- return (__m256i)__builtin_shufflevector(__X, __X, 0, 1, 0, 1);
+ return __builtin_shufflevector(__a, __a, 0, 1, 0, 1);
}
#define _mm_blend_epi32(V1, V2, M) __extension__ ({ \
@@ -812,50 +812,60 @@
static __inline__ __m256i DEFAULT_FN_ATTRS
_mm256_broadcastb_epi8(__m128i __X)
{
- return (__m256i)__builtin_ia32_pbroadcastb256((__v16qi)__X);
+ return (__m256i)__builtin_shufflevector((__v16qi)__X, (__v16qi)__X,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0);
}
static __inline__ __m256i DEFAULT_FN_ATTRS
_mm256_broadcastw_epi16(__m128i __X)
{
- return (__m256i)__builtin_ia32_pbroadcastw256((__v8hi)__X);
+ return (__m256i)__builtin_shufflevector((__v8hi)__X, (__v8hi)__X,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0);
}
static __inline__ __m256i DEFAULT_FN_ATTRS
_mm256_broadcastd_epi32(__m128i __X)
{
- return (__m256i)__builtin_ia32_pbroadcastd256((__v4si)__X);
+ return (__m256i)__builtin_shufflevector((__v4si)__X, (__v4si)__X,
+ 0, 0, 0, 0, 0, 0, 0, 0);
}
static __inline__ __m256i DEFAULT_FN_ATTRS
_mm256_broadcastq_epi64(__m128i __X)
{
- return (__m256i)__builtin_ia32_pbroadcastq256(__X);
+ return (__m256i)__builtin_shufflevector((__v2di)__X, (__v2di)__X, 0, 0, 0, 0);
}
static __inline__ __m128i DEFAULT_FN_ATTRS
_mm_broadcastb_epi8(__m128i __X)
{
- return (__m128i)__builtin_ia32_pbroadcastb128((__v16qi)__X);
+ return (__m128i)__builtin_shufflevector((__v16qi)__X, (__v16qi)__X,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0);
}
static __inline__ __m128i DEFAULT_FN_ATTRS
_mm_broadcastw_epi16(__m128i __X)
{
- return (__m128i)__builtin_ia32_pbroadcastw128((__v8hi)__X);
+ return (__m128i)__builtin_shufflevector((__v8hi)__X, (__v8hi)__X,
+ 0, 0, 0, 0, 0, 0, 0, 0);
}
static __inline__ __m128i DEFAULT_FN_ATTRS
_mm_broadcastd_epi32(__m128i __X)
{
- return (__m128i)__builtin_ia32_pbroadcastd128((__v4si)__X);
+ return (__m128i)__builtin_shufflevector((__v4si)__X, (__v4si)__X, 0, 0, 0, 0);
}
static __inline__ __m128i DEFAULT_FN_ATTRS
_mm_broadcastq_epi64(__m128i __X)
{
- return (__m128i)__builtin_ia32_pbroadcastq128(__X);
+ return (__m128i)__builtin_shufflevector((__v2di)__X, (__v2di)__X, 0, 0);
}
static __inline__ __m256i DEFAULT_FN_ATTRS
Index: test/CodeGen/avx2-builtins.c
===================================================================
--- test/CodeGen/avx2-builtins.c
+++ test/CodeGen/avx2-builtins.c
@@ -607,7 +607,9 @@
}
__m128 test_mm_broadcastss_ps(__m128 a) {
- // CHECK: @llvm.x86.avx2.vbroadcast.ss.ps
+ // CHECK-LABEL: test_mm_broadcastss_ps
+ // CHECK-NOT: @llvm.x86.avx2.vbroadcast.ss.ps
+ // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> zeroinitializer
return _mm_broadcastss_ps(a);
}
@@ -617,12 +619,16 @@
}
__m256 test_mm256_broadcastss_ps(__m128 a) {
- // CHECK: @llvm.x86.avx2.vbroadcast.ss.ps.256
+ // CHECK-LABEL: test_mm256_broadcastss_ps
+ // CHECK-NOT: @llvm.x86.avx2.vbroadcast.ss.ps.256
+ // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <8 x i32> zeroinitializer
return _mm256_broadcastss_ps(a);
}
__m256d test_mm256_broadcastsd_pd(__m128d a) {
- // check: @llvm.x86.avx2.vbroadcast.sd.pd.256
+ // CHECK-LABEL: test_mm256_broadcastsd_pd
+ // CHECK-NOT: @llvm.x86.avx2.vbroadcast.sd.pd.256
+ // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <4 x i32> zeroinitializer
return _mm256_broadcastsd_pd(a);
}
@@ -646,42 +652,58 @@
}
__m256i test_mm256_broadcastb_epi8(__m128i a) {
- // CHECK: @llvm.x86.avx2.pbroadcastb.256
+ // CHECK-LABEL: test_mm256_broadcastb_epi8
+ // CHECK-NOT: @llvm.x86.avx2.pbroadcastb.256
+ // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <32 x i32> zeroinitializer
return _mm256_broadcastb_epi8(a);
}
__m256i test_mm256_broadcastw_epi16(__m128i a) {
- // CHECK: @llvm.x86.avx2.pbroadcastw.256
+ // CHECK-LABEL: test_mm256_broadcastw_epi16
+ // CHECK-NOT: @llvm.x86.avx2.pbroadcastw.256
+ // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <16 x i32> zeroinitializer
return _mm256_broadcastw_epi16(a);
}
__m256i test_mm256_broadcastd_epi32(__m128i a) {
- // CHECK: @llvm.x86.avx2.pbroadcastd.256
+ // CHECK-LABEL: test_mm256_broadcastd_epi32
+ // CHECK-NOT: @llvm.x86.avx2.pbroadcastd.256
+ // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <8 x i32> zeroinitializer
return _mm256_broadcastd_epi32(a);
}
__m256i test_mm256_broadcastq_epi64(__m128i a) {
- // CHECK: @llvm.x86.avx2.pbroadcastq.256
+ // CHECK-LABEL: test_mm256_broadcastq_epi64
+ // CHECK-NOT: @llvm.x86.avx2.pbroadcastq.256
+ // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <4 x i32> zeroinitializer
return _mm256_broadcastq_epi64(a);
}
__m128i test_mm_broadcastb_epi8(__m128i a) {
- // CHECK: @llvm.x86.avx2.pbroadcastb.128
+ // CHECK-LABEL: test_mm_broadcastb_epi8
+ // CHECK-NOT: @llvm.x86.avx2.pbroadcastb.128
+ // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> zeroinitializer
return _mm_broadcastb_epi8(a);
}
__m128i test_mm_broadcastw_epi16(__m128i a) {
- // CHECK: @llvm.x86.avx2.pbroadcastw.128
+ // CHECK-LABEL: test_mm_broadcastw_epi16
+ // CHECK-NOT: @llvm.x86.avx2.pbroadcastw.128
+ // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> zeroinitializer
return _mm_broadcastw_epi16(a);
}
__m128i test_mm_broadcastd_epi32(__m128i a) {
- // CHECK: @llvm.x86.avx2.pbroadcastd.128
+ // CHECK-LABEL: test_mm_broadcastd_epi32
+ // CHECK-NOT: @llvm.x86.avx2.pbroadcastd.128
+ // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> zeroinitializer
return _mm_broadcastd_epi32(a);
}
__m128i test_mm_broadcastq_epi64(__m128i a) {
- // CHECK: @llvm.x86.avx2.pbroadcastq.128
+ // CHECK-LABEL: test_mm_broadcastq_epi64
+ // CHECK-NOT: @llvm.x86.avx2.pbroadcastq.128
+ // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i32> zeroinitializer
return _mm_broadcastq_epi64(a);
}
_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits