[clang] [X86] Align 128/256 variants to use void * as 512 variants. (PR #66310)
https://github.com/RKSimon approved this pull request. LGTM - cheers https://github.com/llvm/llvm-project/pull/66310 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [X86] Align 128/256 variants to use void * as 512 variants. (PR #66310)
RKSimon wrote: I meant the patch title - I'm never quite sure how the title/description appears when these are squashed+merged https://github.com/llvm/llvm-project/pull/66310 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [X86] Align 128/256 variants to use void * as 512 variants. (PR #66310)
FreddyLeaf wrote: > LGTM - please update the description to mention that you're updating the > movnti i32/i64 scalar integer nt ops as well as the vector ops I think this description has covered? May display wrong in the preview, highlighted here: `For *_stream_* series intrinsics` https://github.com/llvm/llvm-project/pull/66310 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [X86] Align 128/256 variants to use void * as 512 variants. (PR #66310)
https://github.com/FreddyLeaf edited https://github.com/llvm/llvm-project/pull/66310 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [X86] Align 128/256 variants to use void * as 512 variants. (PR #66310)
https://github.com/FreddyLeaf updated https://github.com/llvm/llvm-project/pull/66310 >From 21157a0e3b4c4e4e2430752ef806148685a942a2 Mon Sep 17 00:00:00 2001 From: Freddy Ye Date: Thu, 14 Sep 2023 09:17:39 +0800 Subject: [PATCH 1/4] [X86] Align 128/256 variants to use void * as 512 variants. For *_stream_* series intrinsics. --- clang/lib/Headers/avx2intrin.h | 2 +- clang/lib/Headers/avxintrin.h | 6 +++--- clang/lib/Headers/emmintrin.h | 8 clang/lib/Headers/smmintrin.h | 2 +- clang/lib/Headers/xmmintrin.h | 2 +- clang/test/CodeGen/X86/avx-builtins.c | 18 ++ clang/test/CodeGen/X86/avx2-builtins.c | 6 ++ clang/test/CodeGen/X86/sse-builtins.c | 6 ++ clang/test/CodeGen/X86/sse2-builtins.c | 24 clang/test/CodeGen/X86/sse41-builtins.c | 6 ++ 10 files changed, 70 insertions(+), 10 deletions(-) diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h index c45006193eddcc9..675a93bba1c8a4f 100644 --- a/clang/lib/Headers/avx2intrin.h +++ b/clang/lib/Headers/avx2intrin.h @@ -2979,7 +2979,7 @@ _mm256_xor_si256(__m256i __a, __m256i __b) ///A pointer to the 32-byte aligned memory containing the vector to load. /// \returns A 256-bit integer vector loaded from memory. static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_stream_load_si256(__m256i const *__V) +_mm256_stream_load_si256(void const *__V) { typedef __v4di __v4di_aligned __attribute__((aligned(32))); return (__m256i)__builtin_nontemporal_load((const __v4di_aligned *)__V); diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h index 94fac5e6c9da471..b796bb773ec11f0 100644 --- a/clang/lib/Headers/avxintrin.h +++ b/clang/lib/Headers/avxintrin.h @@ -3563,7 +3563,7 @@ _mm_maskstore_ps(float *__p, __m128i __m, __m128 __a) /// \param __b ///A 256-bit integer vector containing the values to be moved. static __inline void __DEFAULT_FN_ATTRS -_mm256_stream_si256(__m256i *__a, __m256i __b) +_mm256_stream_si256(void *__a, __m256i __b) { typedef __v4di __v4di_aligned __attribute__((aligned(32))); __builtin_nontemporal_store((__v4di_aligned)__b, (__v4di_aligned*)__a); @@ -3583,7 +3583,7 @@ _mm256_stream_si256(__m256i *__a, __m256i __b) /// \param __b ///A 256-bit vector of [4 x double] containing the values to be moved. static __inline void __DEFAULT_FN_ATTRS -_mm256_stream_pd(double *__a, __m256d __b) +_mm256_stream_pd(void *__a, __m256d __b) { typedef __v4df __v4df_aligned __attribute__((aligned(32))); __builtin_nontemporal_store((__v4df_aligned)__b, (__v4df_aligned*)__a); @@ -3604,7 +3604,7 @@ _mm256_stream_pd(double *__a, __m256d __b) /// \param __a ///A 256-bit vector of [8 x float] containing the values to be moved. static __inline void __DEFAULT_FN_ATTRS -_mm256_stream_ps(float *__p, __m256 __a) +_mm256_stream_ps(void *__p, __m256 __a) { typedef __v8sf __v8sf_aligned __attribute__((aligned(32))); __builtin_nontemporal_store((__v8sf_aligned)__a, (__v8sf_aligned*)__p); diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h index 064d974936598f8..eacb0182614304d 100644 --- a/clang/lib/Headers/emmintrin.h +++ b/clang/lib/Headers/emmintrin.h @@ -3945,7 +3945,7 @@ static __inline__ void __DEFAULT_FN_ATTRS _mm_storel_epi64(__m128i_u *__p, ///A pointer to the 128-bit aligned memory location used to store the value. /// \param __a ///A vector of [2 x double] containing the 64-bit values to be stored. -static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_pd(double *__p, +static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_pd(void *__p, __m128d __a) { __builtin_nontemporal_store((__v2df)__a, (__v2df *)__p); } @@ -3963,7 +3963,7 @@ static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_pd(double *__p, ///A pointer to the 128-bit aligned memory location used to store the value. /// \param __a ///A 128-bit integer vector containing the values to be stored. -static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_si128(__m128i *__p, +static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_si128(void *__p, __m128i __a) { __builtin_nontemporal_store((__v2di)__a, (__v2di *)__p); } @@ -3983,7 +3983,7 @@ static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_si128(__m128i *__p, ///A 32-bit integer containing the value to be stored. static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("sse2"))) -_mm_stream_si32(int *__p, int __a) { +_mm_stream_si32(void *__p, int __a) { __builtin_ia32_movnti(__p, __a); } @@ -4003,7 +4003,7 @@ static __inline__ void ///A 64-bit integer containing the value to be stored. static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("sse2"))) -_mm_stream_si64(long long *__p,
[clang] [X86] Align 128/256 variants to use void * as 512 variants. (PR #66310)
RKSimon wrote: They're AMD specific so probably not covered https://github.com/llvm/llvm-project/pull/66310 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [X86] Align 128/256 variants to use void * as 512 variants. (PR #66310)
FreddyLeaf wrote: > For completeness the _mm_stream_sd / _mm_stream_ss SSE4A intrinsics still > need updating as well: > https://github.com/llvm/llvm-project/blob/59fbba94908f65eedb8bdd619e425bf97d84b2e3/clang/lib/Headers/ammintrin.h#L158C1-L158C14 Good catch. Seems like intrinsic guide missed these two. https://github.com/llvm/llvm-project/pull/66310 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [X86] Align 128/256 variants to use void * as 512 variants. (PR #66310)
RKSimon wrote: For completeness the _mm_stream_sd / _mm_stream_ss SSE4A intrinsics still need updating as well: https://github.com/llvm/llvm-project/blob/59fbba94908f65eedb8bdd619e425bf97d84b2e3/clang/lib/Headers/ammintrin.h#L158C1-L158C14 https://github.com/llvm/llvm-project/pull/66310 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [X86] Align 128/256 variants to use void * as 512 variants. (PR #66310)
https://github.com/FreddyLeaf edited https://github.com/llvm/llvm-project/pull/66310 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [X86] Align 128/256 variants to use void * as 512 variants. (PR #66310)
https://github.com/FreddyLeaf updated https://github.com/llvm/llvm-project/pull/66310 >From 21157a0e3b4c4e4e2430752ef806148685a942a2 Mon Sep 17 00:00:00 2001 From: Freddy Ye Date: Thu, 14 Sep 2023 09:17:39 +0800 Subject: [PATCH 1/3] [X86] Align 128/256 variants to use void * as 512 variants. For *_stream_* series intrinsics. --- clang/lib/Headers/avx2intrin.h | 2 +- clang/lib/Headers/avxintrin.h | 6 +++--- clang/lib/Headers/emmintrin.h | 8 clang/lib/Headers/smmintrin.h | 2 +- clang/lib/Headers/xmmintrin.h | 2 +- clang/test/CodeGen/X86/avx-builtins.c | 18 ++ clang/test/CodeGen/X86/avx2-builtins.c | 6 ++ clang/test/CodeGen/X86/sse-builtins.c | 6 ++ clang/test/CodeGen/X86/sse2-builtins.c | 24 clang/test/CodeGen/X86/sse41-builtins.c | 6 ++ 10 files changed, 70 insertions(+), 10 deletions(-) diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h index c45006193eddcc9..675a93bba1c8a4f 100644 --- a/clang/lib/Headers/avx2intrin.h +++ b/clang/lib/Headers/avx2intrin.h @@ -2979,7 +2979,7 @@ _mm256_xor_si256(__m256i __a, __m256i __b) ///A pointer to the 32-byte aligned memory containing the vector to load. /// \returns A 256-bit integer vector loaded from memory. static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_stream_load_si256(__m256i const *__V) +_mm256_stream_load_si256(void const *__V) { typedef __v4di __v4di_aligned __attribute__((aligned(32))); return (__m256i)__builtin_nontemporal_load((const __v4di_aligned *)__V); diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h index 94fac5e6c9da471..b796bb773ec11f0 100644 --- a/clang/lib/Headers/avxintrin.h +++ b/clang/lib/Headers/avxintrin.h @@ -3563,7 +3563,7 @@ _mm_maskstore_ps(float *__p, __m128i __m, __m128 __a) /// \param __b ///A 256-bit integer vector containing the values to be moved. static __inline void __DEFAULT_FN_ATTRS -_mm256_stream_si256(__m256i *__a, __m256i __b) +_mm256_stream_si256(void *__a, __m256i __b) { typedef __v4di __v4di_aligned __attribute__((aligned(32))); __builtin_nontemporal_store((__v4di_aligned)__b, (__v4di_aligned*)__a); @@ -3583,7 +3583,7 @@ _mm256_stream_si256(__m256i *__a, __m256i __b) /// \param __b ///A 256-bit vector of [4 x double] containing the values to be moved. static __inline void __DEFAULT_FN_ATTRS -_mm256_stream_pd(double *__a, __m256d __b) +_mm256_stream_pd(void *__a, __m256d __b) { typedef __v4df __v4df_aligned __attribute__((aligned(32))); __builtin_nontemporal_store((__v4df_aligned)__b, (__v4df_aligned*)__a); @@ -3604,7 +3604,7 @@ _mm256_stream_pd(double *__a, __m256d __b) /// \param __a ///A 256-bit vector of [8 x float] containing the values to be moved. static __inline void __DEFAULT_FN_ATTRS -_mm256_stream_ps(float *__p, __m256 __a) +_mm256_stream_ps(void *__p, __m256 __a) { typedef __v8sf __v8sf_aligned __attribute__((aligned(32))); __builtin_nontemporal_store((__v8sf_aligned)__a, (__v8sf_aligned*)__p); diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h index 064d974936598f8..eacb0182614304d 100644 --- a/clang/lib/Headers/emmintrin.h +++ b/clang/lib/Headers/emmintrin.h @@ -3945,7 +3945,7 @@ static __inline__ void __DEFAULT_FN_ATTRS _mm_storel_epi64(__m128i_u *__p, ///A pointer to the 128-bit aligned memory location used to store the value. /// \param __a ///A vector of [2 x double] containing the 64-bit values to be stored. -static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_pd(double *__p, +static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_pd(void *__p, __m128d __a) { __builtin_nontemporal_store((__v2df)__a, (__v2df *)__p); } @@ -3963,7 +3963,7 @@ static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_pd(double *__p, ///A pointer to the 128-bit aligned memory location used to store the value. /// \param __a ///A 128-bit integer vector containing the values to be stored. -static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_si128(__m128i *__p, +static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_si128(void *__p, __m128i __a) { __builtin_nontemporal_store((__v2di)__a, (__v2di *)__p); } @@ -3983,7 +3983,7 @@ static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_si128(__m128i *__p, ///A 32-bit integer containing the value to be stored. static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("sse2"))) -_mm_stream_si32(int *__p, int __a) { +_mm_stream_si32(void *__p, int __a) { __builtin_ia32_movnti(__p, __a); } @@ -4003,7 +4003,7 @@ static __inline__ void ///A 64-bit integer containing the value to be stored. static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("sse2"))) -_mm_stream_si64(long long *__p,
[clang] [X86] Align 128/256 variants to use void * as 512 variants. (PR #66310)
https://github.com/KanRobert approved this pull request. LGTM https://github.com/llvm/llvm-project/pull/66310 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [X86] Align 128/256 variants to use void * as 512 variants. (PR #66310)
https://github.com/RKSimon approved this pull request. LGTM - please update the description to mention that you're updating the movnti i32/i64 scalar integer nt ops as well as the vector ops https://github.com/llvm/llvm-project/pull/66310 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [X86] Align 128/256 variants to use void * as 512 variants. (PR #66310)
https://github.com/FreddyLeaf resolved https://github.com/llvm/llvm-project/pull/66310 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [X86] Align 128/256 variants to use void * as 512 variants. (PR #66310)
@@ -358,6 +358,12 @@ __m128i test_mm_stream_load_si128(__m128i const *a) { return _mm_stream_load_si128(a); } +__m128i test_mm_stream_load_si128_void(void const *a) { FreddyLeaf wrote: 3e463d3c41bb8e8391c62dd95910d009b4b39b7d https://github.com/llvm/llvm-project/pull/66310 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [X86] Align 128/256 variants to use void * as 512 variants. (PR #66310)
https://github.com/FreddyLeaf resolved https://github.com/llvm/llvm-project/pull/66310 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [X86] Align 128/256 variants to use void * as 512 variants. (PR #66310)
@@ -645,7 +645,7 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mul_epi32(__m128i __V1, /// \returns A 128-bit integer vector containing the data stored at the ///specified memory location. static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_stream_load_si128(__m128i const *__V) { +_mm_stream_load_si128(void const *__V) { FreddyLeaf wrote: 3e463d3c41bb8e8391c62dd95910d009b4b39b7d https://github.com/llvm/llvm-project/pull/66310 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [X86] Align 128/256 variants to use void * as 512 variants. (PR #66310)
https://github.com/FreddyLeaf resolved https://github.com/llvm/llvm-project/pull/66310 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [X86] Align 128/256 variants to use void * as 512 variants. (PR #66310)
@@ -2979,7 +2979,7 @@ _mm256_xor_si256(__m256i __a, __m256i __b) ///A pointer to the 32-byte aligned memory containing the vector to load. /// \returns A 256-bit integer vector loaded from memory. static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_stream_load_si256(__m256i const *__V) +_mm256_stream_load_si256(void const *__V) FreddyLeaf wrote: 3e463d3c41bb8e8391c62dd95910d009b4b39b7d https://github.com/llvm/llvm-project/pull/66310 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [X86] Align 128/256 variants to use void * as 512 variants. (PR #66310)
https://github.com/FreddyLeaf resolved https://github.com/llvm/llvm-project/pull/66310 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [X86] Align 128/256 variants to use void * as 512 variants. (PR #66310)
https://github.com/FreddyLeaf edited https://github.com/llvm/llvm-project/pull/66310 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [X86] Align 128/256 variants to use void * as 512 variants. (PR #66310)
@@ -1223,6 +1223,12 @@ __m256i test_mm256_stream_load_si256(__m256i const *a) { return _mm256_stream_load_si256(a); } +__m256i test_mm256_stream_load_si256_const(void const *a) { FreddyLeaf wrote: 3e463d3 https://github.com/llvm/llvm-project/pull/66310 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [X86] Align 128/256 variants to use void * as 512 variants. (PR #66310)
https://github.com/FreddyLeaf updated https://github.com/llvm/llvm-project/pull/66310 >From 21157a0e3b4c4e4e2430752ef806148685a942a2 Mon Sep 17 00:00:00 2001 From: Freddy Ye Date: Thu, 14 Sep 2023 09:17:39 +0800 Subject: [PATCH 1/2] [X86] Align 128/256 variants to use void * as 512 variants. For *_stream_* series intrinsics. --- clang/lib/Headers/avx2intrin.h | 2 +- clang/lib/Headers/avxintrin.h | 6 +++--- clang/lib/Headers/emmintrin.h | 8 clang/lib/Headers/smmintrin.h | 2 +- clang/lib/Headers/xmmintrin.h | 2 +- clang/test/CodeGen/X86/avx-builtins.c | 18 ++ clang/test/CodeGen/X86/avx2-builtins.c | 6 ++ clang/test/CodeGen/X86/sse-builtins.c | 6 ++ clang/test/CodeGen/X86/sse2-builtins.c | 24 clang/test/CodeGen/X86/sse41-builtins.c | 6 ++ 10 files changed, 70 insertions(+), 10 deletions(-) diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h index c45006193eddcc9..675a93bba1c8a4f 100644 --- a/clang/lib/Headers/avx2intrin.h +++ b/clang/lib/Headers/avx2intrin.h @@ -2979,7 +2979,7 @@ _mm256_xor_si256(__m256i __a, __m256i __b) ///A pointer to the 32-byte aligned memory containing the vector to load. /// \returns A 256-bit integer vector loaded from memory. static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_stream_load_si256(__m256i const *__V) +_mm256_stream_load_si256(void const *__V) { typedef __v4di __v4di_aligned __attribute__((aligned(32))); return (__m256i)__builtin_nontemporal_load((const __v4di_aligned *)__V); diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h index 94fac5e6c9da471..b796bb773ec11f0 100644 --- a/clang/lib/Headers/avxintrin.h +++ b/clang/lib/Headers/avxintrin.h @@ -3563,7 +3563,7 @@ _mm_maskstore_ps(float *__p, __m128i __m, __m128 __a) /// \param __b ///A 256-bit integer vector containing the values to be moved. static __inline void __DEFAULT_FN_ATTRS -_mm256_stream_si256(__m256i *__a, __m256i __b) +_mm256_stream_si256(void *__a, __m256i __b) { typedef __v4di __v4di_aligned __attribute__((aligned(32))); __builtin_nontemporal_store((__v4di_aligned)__b, (__v4di_aligned*)__a); @@ -3583,7 +3583,7 @@ _mm256_stream_si256(__m256i *__a, __m256i __b) /// \param __b ///A 256-bit vector of [4 x double] containing the values to be moved. static __inline void __DEFAULT_FN_ATTRS -_mm256_stream_pd(double *__a, __m256d __b) +_mm256_stream_pd(void *__a, __m256d __b) { typedef __v4df __v4df_aligned __attribute__((aligned(32))); __builtin_nontemporal_store((__v4df_aligned)__b, (__v4df_aligned*)__a); @@ -3604,7 +3604,7 @@ _mm256_stream_pd(double *__a, __m256d __b) /// \param __a ///A 256-bit vector of [8 x float] containing the values to be moved. static __inline void __DEFAULT_FN_ATTRS -_mm256_stream_ps(float *__p, __m256 __a) +_mm256_stream_ps(void *__p, __m256 __a) { typedef __v8sf __v8sf_aligned __attribute__((aligned(32))); __builtin_nontemporal_store((__v8sf_aligned)__a, (__v8sf_aligned*)__p); diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h index 064d974936598f8..eacb0182614304d 100644 --- a/clang/lib/Headers/emmintrin.h +++ b/clang/lib/Headers/emmintrin.h @@ -3945,7 +3945,7 @@ static __inline__ void __DEFAULT_FN_ATTRS _mm_storel_epi64(__m128i_u *__p, ///A pointer to the 128-bit aligned memory location used to store the value. /// \param __a ///A vector of [2 x double] containing the 64-bit values to be stored. -static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_pd(double *__p, +static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_pd(void *__p, __m128d __a) { __builtin_nontemporal_store((__v2df)__a, (__v2df *)__p); } @@ -3963,7 +3963,7 @@ static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_pd(double *__p, ///A pointer to the 128-bit aligned memory location used to store the value. /// \param __a ///A 128-bit integer vector containing the values to be stored. -static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_si128(__m128i *__p, +static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_si128(void *__p, __m128i __a) { __builtin_nontemporal_store((__v2di)__a, (__v2di *)__p); } @@ -3983,7 +3983,7 @@ static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_si128(__m128i *__p, ///A 32-bit integer containing the value to be stored. static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("sse2"))) -_mm_stream_si32(int *__p, int __a) { +_mm_stream_si32(void *__p, int __a) { __builtin_ia32_movnti(__p, __a); } @@ -4003,7 +4003,7 @@ static __inline__ void ///A 64-bit integer containing the value to be stored. static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("sse2"))) -_mm_stream_si64(long long *__p,
[clang] [X86] Align 128/256 variants to use void * as 512 variants. (PR #66310)
@@ -1223,6 +1223,12 @@ __m256i test_mm256_stream_load_si256(__m256i const *a) { return _mm256_stream_load_si256(a); } +__m256i test_mm256_stream_load_si256_const(void const *a) { KanRobert wrote: +1 https://github.com/llvm/llvm-project/pull/66310 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [X86] Align 128/256 variants to use void * as 512 variants. (PR #66310)
@@ -358,6 +358,12 @@ __m128i test_mm_stream_load_si128(__m128i const *a) { return _mm_stream_load_si128(a); } +__m128i test_mm_stream_load_si128_void(void const *a) { phoebewang wrote: `const void`? https://github.com/llvm/llvm-project/pull/66310 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [X86] Align 128/256 variants to use void * as 512 variants. (PR #66310)
@@ -2979,7 +2979,7 @@ _mm256_xor_si256(__m256i __a, __m256i __b) ///A pointer to the 32-byte aligned memory containing the vector to load. /// \returns A 256-bit integer vector loaded from memory. static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_stream_load_si256(__m256i const *__V) +_mm256_stream_load_si256(void const *__V) phoebewang wrote: Move `const` first? https://github.com/llvm/llvm-project/pull/66310 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [X86] Align 128/256 variants to use void * as 512 variants. (PR #66310)
@@ -645,7 +645,7 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mul_epi32(__m128i __V1, /// \returns A 128-bit integer vector containing the data stored at the ///specified memory location. static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_stream_load_si128(__m128i const *__V) { +_mm_stream_load_si128(void const *__V) { phoebewang wrote: Move `const` first? https://github.com/llvm/llvm-project/pull/66310 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [X86] Align 128/256 variants to use void * as 512 variants. (PR #66310)
https://github.com/phoebewang edited https://github.com/llvm/llvm-project/pull/66310 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [X86] Align 128/256 variants to use void * as 512 variants. (PR #66310)
https://github.com/phoebewang approved this pull request. LGTM. https://github.com/llvm/llvm-project/pull/66310 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [X86] Align 128/256 variants to use void * as 512 variants. (PR #66310)
https://github.com/FreddyLeaf review_requested https://github.com/llvm/llvm-project/pull/66310 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [X86] Align 128/256 variants to use void * as 512 variants. (PR #66310)
FreddyLeaf wrote: Here's the change for 512 variants before: https://reviews.llvm.org/D66786 https://github.com/llvm/llvm-project/pull/66310 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [X86] Align 128/256 variants to use void * as 512 variants. (PR #66310)
llvmbot wrote: @llvm/pr-subscribers-clang Changes For *_stream_* series intrinsics. -- Full diff: https://github.com/llvm/llvm-project/pull/66310.diff 10 Files Affected: - (modified) clang/lib/Headers/avx2intrin.h (+1-1) - (modified) clang/lib/Headers/avxintrin.h (+3-3) - (modified) clang/lib/Headers/emmintrin.h (+4-4) - (modified) clang/lib/Headers/smmintrin.h (+1-1) - (modified) clang/lib/Headers/xmmintrin.h (+1-1) - (modified) clang/test/CodeGen/X86/avx-builtins.c (+18) - (modified) clang/test/CodeGen/X86/avx2-builtins.c (+6) - (modified) clang/test/CodeGen/X86/sse-builtins.c (+6) - (modified) clang/test/CodeGen/X86/sse2-builtins.c (+24) - (modified) clang/test/CodeGen/X86/sse41-builtins.c (+6) diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h index c45006193eddcc9..675a93bba1c8a4f 100644 --- a/clang/lib/Headers/avx2intrin.h +++ b/clang/lib/Headers/avx2intrin.h @@ -2979,7 +2979,7 @@ _mm256_xor_si256(__m256i __a, __m256i __b) ///A pointer to the 32-byte aligned memory containing the vector to load. /// \returns A 256-bit integer vector loaded from memory. static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_stream_load_si256(__m256i const *__V) +_mm256_stream_load_si256(void const *__V) { typedef __v4di __v4di_aligned __attribute__((aligned(32))); return (__m256i)__builtin_nontemporal_load((const __v4di_aligned *)__V); diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h index 94fac5e6c9da471..b796bb773ec11f0 100644 --- a/clang/lib/Headers/avxintrin.h +++ b/clang/lib/Headers/avxintrin.h @@ -3563,7 +3563,7 @@ _mm_maskstore_ps(float *__p, __m128i __m, __m128 __a) /// \param __b ///A 256-bit integer vector containing the values to be moved. static __inline void __DEFAULT_FN_ATTRS -_mm256_stream_si256(__m256i *__a, __m256i __b) +_mm256_stream_si256(void *__a, __m256i __b) { typedef __v4di __v4di_aligned __attribute__((aligned(32))); __builtin_nontemporal_store((__v4di_aligned)__b, (__v4di_aligned*)__a); @@ -3583,7 +3583,7 @@ _mm256_stream_si256(__m256i *__a, __m256i __b) /// \param __b ///A 256-bit vector of [4 x double] containing the values to be moved. static __inline void __DEFAULT_FN_ATTRS -_mm256_stream_pd(double *__a, __m256d __b) +_mm256_stream_pd(void *__a, __m256d __b) { typedef __v4df __v4df_aligned __attribute__((aligned(32))); __builtin_nontemporal_store((__v4df_aligned)__b, (__v4df_aligned*)__a); @@ -3604,7 +3604,7 @@ _mm256_stream_pd(double *__a, __m256d __b) /// \param __a ///A 256-bit vector of [8 x float] containing the values to be moved. static __inline void __DEFAULT_FN_ATTRS -_mm256_stream_ps(float *__p, __m256 __a) +_mm256_stream_ps(void *__p, __m256 __a) { typedef __v8sf __v8sf_aligned __attribute__((aligned(32))); __builtin_nontemporal_store((__v8sf_aligned)__a, (__v8sf_aligned*)__p); diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h index 064d974936598f8..eacb0182614304d 100644 --- a/clang/lib/Headers/emmintrin.h +++ b/clang/lib/Headers/emmintrin.h @@ -3945,7 +3945,7 @@ static __inline__ void __DEFAULT_FN_ATTRS _mm_storel_epi64(__m128i_u *__p, ///A pointer to the 128-bit aligned memory location used to store the value. /// \param __a ///A vector of [2 x double] containing the 64-bit values to be stored. -static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_pd(double *__p, +static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_pd(void *__p, __m128d __a) { __builtin_nontemporal_store((__v2df)__a, (__v2df *)__p); } @@ -3963,7 +3963,7 @@ static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_pd(double *__p, ///A pointer to the 128-bit aligned memory location used to store the value. /// \param __a ///A 128-bit integer vector containing the values to be stored. -static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_si128(__m128i *__p, +static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_si128(void *__p, __m128i __a) { __builtin_nontemporal_store((__v2di)__a, (__v2di *)__p); } @@ -3983,7 +3983,7 @@ static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_si128(__m128i *__p, ///A 32-bit integer containing the value to be stored. static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("sse2"))) -_mm_stream_si32(int *__p, int __a) { +_mm_stream_si32(void *__p, int __a) { __builtin_ia32_movnti(__p, __a); } @@ -4003,7 +4003,7 @@ static __inline__ void ///A 64-bit integer containing the value to be stored. static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("sse2"))) -_mm_stream_si64(long long *__p, long long __a) { +_mm_stream_si64(void *__p, long long __a) { __builtin_ia32_movnti64(__p, __a); } #endif diff --git a/clang/lib/Headers/smmintrin.h b/clang/lib/Headers/smmintrin.h index 1
[clang] [X86] Align 128/256 variants to use void * as 512 variants. (PR #66310)
https://github.com/FreddyLeaf review_requested https://github.com/llvm/llvm-project/pull/66310 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [X86] Align 128/256 variants to use void * as 512 variants. (PR #66310)
https://github.com/FreddyLeaf review_requested https://github.com/llvm/llvm-project/pull/66310 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [X86] Align 128/256 variants to use void * as 512 variants. (PR #66310)
https://github.com/FreddyLeaf review_requested https://github.com/llvm/llvm-project/pull/66310 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [X86] Align 128/256 variants to use void * as 512 variants. (PR #66310)
https://github.com/llvmbot labeled https://github.com/llvm/llvm-project/pull/66310 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [X86] Align 128/256 variants to use void * as 512 variants. (PR #66310)
https://github.com/FreddyLeaf review_requested https://github.com/llvm/llvm-project/pull/66310 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [X86] Align 128/256 variants to use void * as 512 variants. (PR #66310)
https://github.com/llvmbot labeled https://github.com/llvm/llvm-project/pull/66310 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [X86] Align 128/256 variants to use void * as 512 variants. (PR #66310)
https://github.com/llvmbot labeled https://github.com/llvm/llvm-project/pull/66310 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [X86] Align 128/256 variants to use void * as 512 variants. (PR #66310)
https://github.com/FreddyLeaf review_requested https://github.com/llvm/llvm-project/pull/66310 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [X86] Align 128/256 variants to use void * as 512 variants. (PR #66310)
https://github.com/FreddyLeaf created https://github.com/llvm/llvm-project/pull/66310: For *_stream_* series intrinsics. >From 21157a0e3b4c4e4e2430752ef806148685a942a2 Mon Sep 17 00:00:00 2001 From: Freddy Ye Date: Thu, 14 Sep 2023 09:17:39 +0800 Subject: [PATCH] [X86] Align 128/256 variants to use void * as 512 variants. For *_stream_* series intrinsics. --- clang/lib/Headers/avx2intrin.h | 2 +- clang/lib/Headers/avxintrin.h | 6 +++--- clang/lib/Headers/emmintrin.h | 8 clang/lib/Headers/smmintrin.h | 2 +- clang/lib/Headers/xmmintrin.h | 2 +- clang/test/CodeGen/X86/avx-builtins.c | 18 ++ clang/test/CodeGen/X86/avx2-builtins.c | 6 ++ clang/test/CodeGen/X86/sse-builtins.c | 6 ++ clang/test/CodeGen/X86/sse2-builtins.c | 24 clang/test/CodeGen/X86/sse41-builtins.c | 6 ++ 10 files changed, 70 insertions(+), 10 deletions(-) diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h index c45006193eddcc9..675a93bba1c8a4f 100644 --- a/clang/lib/Headers/avx2intrin.h +++ b/clang/lib/Headers/avx2intrin.h @@ -2979,7 +2979,7 @@ _mm256_xor_si256(__m256i __a, __m256i __b) ///A pointer to the 32-byte aligned memory containing the vector to load. /// \returns A 256-bit integer vector loaded from memory. static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_stream_load_si256(__m256i const *__V) +_mm256_stream_load_si256(void const *__V) { typedef __v4di __v4di_aligned __attribute__((aligned(32))); return (__m256i)__builtin_nontemporal_load((const __v4di_aligned *)__V); diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h index 94fac5e6c9da471..b796bb773ec11f0 100644 --- a/clang/lib/Headers/avxintrin.h +++ b/clang/lib/Headers/avxintrin.h @@ -3563,7 +3563,7 @@ _mm_maskstore_ps(float *__p, __m128i __m, __m128 __a) /// \param __b ///A 256-bit integer vector containing the values to be moved. static __inline void __DEFAULT_FN_ATTRS -_mm256_stream_si256(__m256i *__a, __m256i __b) +_mm256_stream_si256(void *__a, __m256i __b) { typedef __v4di __v4di_aligned __attribute__((aligned(32))); __builtin_nontemporal_store((__v4di_aligned)__b, (__v4di_aligned*)__a); @@ -3583,7 +3583,7 @@ _mm256_stream_si256(__m256i *__a, __m256i __b) /// \param __b ///A 256-bit vector of [4 x double] containing the values to be moved. static __inline void __DEFAULT_FN_ATTRS -_mm256_stream_pd(double *__a, __m256d __b) +_mm256_stream_pd(void *__a, __m256d __b) { typedef __v4df __v4df_aligned __attribute__((aligned(32))); __builtin_nontemporal_store((__v4df_aligned)__b, (__v4df_aligned*)__a); @@ -3604,7 +3604,7 @@ _mm256_stream_pd(double *__a, __m256d __b) /// \param __a ///A 256-bit vector of [8 x float] containing the values to be moved. static __inline void __DEFAULT_FN_ATTRS -_mm256_stream_ps(float *__p, __m256 __a) +_mm256_stream_ps(void *__p, __m256 __a) { typedef __v8sf __v8sf_aligned __attribute__((aligned(32))); __builtin_nontemporal_store((__v8sf_aligned)__a, (__v8sf_aligned*)__p); diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h index 064d974936598f8..eacb0182614304d 100644 --- a/clang/lib/Headers/emmintrin.h +++ b/clang/lib/Headers/emmintrin.h @@ -3945,7 +3945,7 @@ static __inline__ void __DEFAULT_FN_ATTRS _mm_storel_epi64(__m128i_u *__p, ///A pointer to the 128-bit aligned memory location used to store the value. /// \param __a ///A vector of [2 x double] containing the 64-bit values to be stored. -static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_pd(double *__p, +static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_pd(void *__p, __m128d __a) { __builtin_nontemporal_store((__v2df)__a, (__v2df *)__p); } @@ -3963,7 +3963,7 @@ static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_pd(double *__p, ///A pointer to the 128-bit aligned memory location used to store the value. /// \param __a ///A 128-bit integer vector containing the values to be stored. -static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_si128(__m128i *__p, +static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_si128(void *__p, __m128i __a) { __builtin_nontemporal_store((__v2di)__a, (__v2di *)__p); } @@ -3983,7 +3983,7 @@ static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_si128(__m128i *__p, ///A 32-bit integer containing the value to be stored. static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("sse2"))) -_mm_stream_si32(int *__p, int __a) { +_mm_stream_si32(void *__p, int __a) { __builtin_ia32_movnti(__p, __a); } @@ -4003,7 +4003,7 @@ static __inline__ void ///A 64-bit integer containing the value to be stored. static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("sse2"))) -