[PATCH] D107843: [X86] Add parentheses around casts in some of the X86 intrinsic headers.
This revision was landed with ongoing or failed builds. This revision was automatically updated to reflect the committed changes. Closed by commit rG4190d99dfcab: [X86] Add parentheses around casts in some of the X86 intrinsic headers. (authored by craig.topper). Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D107843/new/ https://reviews.llvm.org/D107843 Files: clang/lib/Headers/__wmmintrin_aes.h clang/lib/Headers/avx2intrin.h clang/lib/Headers/avxintrin.h clang/lib/Headers/emmintrin.h clang/lib/Headers/smmintrin.h clang/lib/Headers/tmmintrin.h clang/lib/Headers/xmmintrin.h clang/test/CodeGen/X86/sse41-builtins.c Index: clang/test/CodeGen/X86/sse41-builtins.c === --- clang/test/CodeGen/X86/sse41-builtins.c +++ clang/test/CodeGen/X86/sse41-builtins.c @@ -393,3 +393,11 @@ // CHECK: call i32 @llvm.x86.sse41.ptestz(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}) return _mm_testz_si128(x, y); } + +// Make sure brackets work after macro intrinsics. +float pr51324(__m128 a) { + // CHECK-LABEL: pr51324 + // CHECK: call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %{{.*}}, i32 0) + // CHECK: extractelement <4 x float> %{{.*}}, i32 0 + return _mm_round_ps(a, 0)[0]; +} Index: clang/lib/Headers/xmmintrin.h === --- clang/lib/Headers/xmmintrin.h +++ clang/lib/Headers/xmmintrin.h @@ -2181,7 +2181,7 @@ ///3: Bits [63:48] are copied to the destination. /// \returns A 16-bit integer containing the extracted 16 bits of packed data. #define _mm_extract_pi16(a, n) \ - (int)__builtin_ia32_vec_ext_v4hi((__v4hi)a, (int)n) + ((int)__builtin_ia32_vec_ext_v4hi((__v4hi)a, (int)n)) /// Copies data from the 64-bit vector of [4 x i16] to the destination, ///and inserts the lower 16-bits of an integer operand at the 16-bit offset @@ -2212,7 +2212,7 @@ /// \returns A 64-bit integer vector containing the copied packed data from the ///operands. #define _mm_insert_pi16(a, d, n) \ - (__m64)__builtin_ia32_vec_set_v4hi((__v4hi)a, (int)d, (int)n) + ((__m64)__builtin_ia32_vec_set_v4hi((__v4hi)a, (int)d, (int)n)) /// Compares each of the corresponding packed 16-bit integer values of ///the 64-bit integer vectors, and writes the greater value to the @@ -2359,7 +2359,7 @@ ///11: assigned from bits [63:48] of \a a. /// \returns A 64-bit integer vector containing the shuffled values. #define _mm_shuffle_pi16(a, n) \ - (__m64)__builtin_ia32_pshufw((__v4hi)(__m64)(a), (n)) + ((__m64)__builtin_ia32_pshufw((__v4hi)(__m64)(a), (n))) /// Conditionally copies the values from each 8-bit element in the first ///64-bit integer vector operand to the specified memory location, as @@ -2601,8 +2601,8 @@ ///11: Bits [127:96] copied from the specified operand. /// \returns A 128-bit vector of [4 x float] containing the shuffled values. #define _mm_shuffle_ps(a, b, mask) \ - (__m128)__builtin_ia32_shufps((__v4sf)(__m128)(a), (__v4sf)(__m128)(b), \ -(int)(mask)) + ((__m128)__builtin_ia32_shufps((__v4sf)(__m128)(a), (__v4sf)(__m128)(b), \ + (int)(mask))) /// Unpacks the high-order (index 2,3) values from two 128-bit vectors of ///[4 x float] and interleaves them into a 128-bit vector of [4 x float]. Index: clang/lib/Headers/tmmintrin.h === --- clang/lib/Headers/tmmintrin.h +++ clang/lib/Headers/tmmintrin.h @@ -145,8 +145,8 @@ /// \returns A 128-bit integer vector containing the concatenated right-shifted ///value. #define _mm_alignr_epi8(a, b, n) \ - (__m128i)__builtin_ia32_palignr128((__v16qi)(__m128i)(a), \ - (__v16qi)(__m128i)(b), (n)) + ((__m128i)__builtin_ia32_palignr128((__v16qi)(__m128i)(a), \ + (__v16qi)(__m128i)(b), (n))) /// Concatenates the two 64-bit integer vector operands, and right-shifts ///the result by the number of bytes specified in the immediate operand. @@ -168,7 +168,7 @@ /// \returns A 64-bit integer vector containing the concatenated right-shifted ///value. #define _mm_alignr_pi8(a, b, n) \ - (__m64)__builtin_ia32_palignr((__v8qi)(__m64)(a), (__v8qi)(__m64)(b), (n)) + ((__m64)__builtin_ia32_palignr((__v8qi)(__m64)(a), (__v8qi)(__m64)(b), (n))) /// Horizontally adds the adjacent pairs of values contained in 2 packed ///128-bit vectors of [8 x i16]. Index: clang/lib/Headers/smmintrin.h === --- clang/lib/Headers/smmintrin.h +++ clang/lib/Headers/smmintrin.h @@ -231,7 +231,7 @@ /// 11: Truncated /// \returns A 128-bit vector of [4 x float] containing the rounded values. #define _mm_round_ps(X, M) \ - (__m128)__builtin_ia32_roundps((__v4sf)(__m128)(X), (M)) +
[PATCH] D107843: [X86] Add parentheses around casts in some of the X86 intrinsic headers.
spatel added a comment. Probably want to address the other cleanups in another patch; the parens fixes and test LGTM. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D107843/new/ https://reviews.llvm.org/D107843 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D107843: [X86] Add parentheses around casts in some of the X86 intrinsic headers.
craig.topper updated this revision to Diff 366071. craig.topper added a comment. Add test case to the bottom of sse41-builtins.c Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D107843/new/ https://reviews.llvm.org/D107843 Files: clang/lib/Headers/__wmmintrin_aes.h clang/lib/Headers/avx2intrin.h clang/lib/Headers/avxintrin.h clang/lib/Headers/emmintrin.h clang/lib/Headers/smmintrin.h clang/lib/Headers/tmmintrin.h clang/lib/Headers/xmmintrin.h clang/test/CodeGen/X86/sse41-builtins.c Index: clang/test/CodeGen/X86/sse41-builtins.c === --- clang/test/CodeGen/X86/sse41-builtins.c +++ clang/test/CodeGen/X86/sse41-builtins.c @@ -393,3 +393,11 @@ // CHECK: call i32 @llvm.x86.sse41.ptestz(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}) return _mm_testz_si128(x, y); } + +// Make sure brackets work after macro intrinsics. +float pr51324(__m128 a) { + // CHECK-LABEL: pr51324 + // CHECK: call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %{{.*}}, i32 0) + // CHECK: extractelement <4 x float> %{{.*}}, i32 0 + return _mm_round_ps(a, 0)[0]; +} Index: clang/lib/Headers/xmmintrin.h === --- clang/lib/Headers/xmmintrin.h +++ clang/lib/Headers/xmmintrin.h @@ -2181,7 +2181,7 @@ ///3: Bits [63:48] are copied to the destination. /// \returns A 16-bit integer containing the extracted 16 bits of packed data. #define _mm_extract_pi16(a, n) \ - (int)__builtin_ia32_vec_ext_v4hi((__v4hi)a, (int)n) + ((int)__builtin_ia32_vec_ext_v4hi((__v4hi)a, (int)n)) /// Copies data from the 64-bit vector of [4 x i16] to the destination, ///and inserts the lower 16-bits of an integer operand at the 16-bit offset @@ -2212,7 +2212,7 @@ /// \returns A 64-bit integer vector containing the copied packed data from the ///operands. #define _mm_insert_pi16(a, d, n) \ - (__m64)__builtin_ia32_vec_set_v4hi((__v4hi)a, (int)d, (int)n) + ((__m64)__builtin_ia32_vec_set_v4hi((__v4hi)a, (int)d, (int)n)) /// Compares each of the corresponding packed 16-bit integer values of ///the 64-bit integer vectors, and writes the greater value to the @@ -2359,7 +2359,7 @@ ///11: assigned from bits [63:48] of \a a. /// \returns A 64-bit integer vector containing the shuffled values. #define _mm_shuffle_pi16(a, n) \ - (__m64)__builtin_ia32_pshufw((__v4hi)(__m64)(a), (n)) + ((__m64)__builtin_ia32_pshufw((__v4hi)(__m64)(a), (n))) /// Conditionally copies the values from each 8-bit element in the first ///64-bit integer vector operand to the specified memory location, as @@ -2601,8 +2601,8 @@ ///11: Bits [127:96] copied from the specified operand. /// \returns A 128-bit vector of [4 x float] containing the shuffled values. #define _mm_shuffle_ps(a, b, mask) \ - (__m128)__builtin_ia32_shufps((__v4sf)(__m128)(a), (__v4sf)(__m128)(b), \ -(int)(mask)) + ((__m128)__builtin_ia32_shufps((__v4sf)(__m128)(a), (__v4sf)(__m128)(b), \ + (int)(mask))) /// Unpacks the high-order (index 2,3) values from two 128-bit vectors of ///[4 x float] and interleaves them into a 128-bit vector of [4 x float]. Index: clang/lib/Headers/tmmintrin.h === --- clang/lib/Headers/tmmintrin.h +++ clang/lib/Headers/tmmintrin.h @@ -145,8 +145,8 @@ /// \returns A 128-bit integer vector containing the concatenated right-shifted ///value. #define _mm_alignr_epi8(a, b, n) \ - (__m128i)__builtin_ia32_palignr128((__v16qi)(__m128i)(a), \ - (__v16qi)(__m128i)(b), (n)) + ((__m128i)__builtin_ia32_palignr128((__v16qi)(__m128i)(a), \ + (__v16qi)(__m128i)(b), (n))) /// Concatenates the two 64-bit integer vector operands, and right-shifts ///the result by the number of bytes specified in the immediate operand. @@ -168,7 +168,7 @@ /// \returns A 64-bit integer vector containing the concatenated right-shifted ///value. #define _mm_alignr_pi8(a, b, n) \ - (__m64)__builtin_ia32_palignr((__v8qi)(__m64)(a), (__v8qi)(__m64)(b), (n)) + ((__m64)__builtin_ia32_palignr((__v8qi)(__m64)(a), (__v8qi)(__m64)(b), (n))) /// Horizontally adds the adjacent pairs of values contained in 2 packed ///128-bit vectors of [8 x i16]. Index: clang/lib/Headers/smmintrin.h === --- clang/lib/Headers/smmintrin.h +++ clang/lib/Headers/smmintrin.h @@ -231,7 +231,7 @@ /// 11: Truncated /// \returns A 128-bit vector of [4 x float] containing the rounded values. #define _mm_round_ps(X, M) \ - (__m128)__builtin_ia32_roundps((__v4sf)(__m128)(X), (M)) + ((__m128)__builtin_ia32_roundps((__v4sf)(__m128)(X), (M))) /// Copies three upper elements of the first 128-bit vector operand to ///the corresponding
[PATCH] D107843: [X86] Add parentheses around casts in some of the X86 intrinsic headers.
craig.topper updated this revision to Diff 365880. craig.topper added a comment. Fix two functions I missed. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D107843/new/ https://reviews.llvm.org/D107843 Files: clang/lib/Headers/__wmmintrin_aes.h clang/lib/Headers/avx2intrin.h clang/lib/Headers/avxintrin.h clang/lib/Headers/emmintrin.h clang/lib/Headers/smmintrin.h clang/lib/Headers/tmmintrin.h clang/lib/Headers/xmmintrin.h Index: clang/lib/Headers/xmmintrin.h === --- clang/lib/Headers/xmmintrin.h +++ clang/lib/Headers/xmmintrin.h @@ -2181,7 +2181,7 @@ ///3: Bits [63:48] are copied to the destination. /// \returns A 16-bit integer containing the extracted 16 bits of packed data. #define _mm_extract_pi16(a, n) \ - (int)__builtin_ia32_vec_ext_v4hi((__v4hi)a, (int)n) + ((int)__builtin_ia32_vec_ext_v4hi((__v4hi)a, (int)n)) /// Copies data from the 64-bit vector of [4 x i16] to the destination, ///and inserts the lower 16-bits of an integer operand at the 16-bit offset @@ -2212,7 +2212,7 @@ /// \returns A 64-bit integer vector containing the copied packed data from the ///operands. #define _mm_insert_pi16(a, d, n) \ - (__m64)__builtin_ia32_vec_set_v4hi((__v4hi)a, (int)d, (int)n) + ((__m64)__builtin_ia32_vec_set_v4hi((__v4hi)a, (int)d, (int)n)) /// Compares each of the corresponding packed 16-bit integer values of ///the 64-bit integer vectors, and writes the greater value to the @@ -2359,7 +2359,7 @@ ///11: assigned from bits [63:48] of \a a. /// \returns A 64-bit integer vector containing the shuffled values. #define _mm_shuffle_pi16(a, n) \ - (__m64)__builtin_ia32_pshufw((__v4hi)(__m64)(a), (n)) + ((__m64)__builtin_ia32_pshufw((__v4hi)(__m64)(a), (n))) /// Conditionally copies the values from each 8-bit element in the first ///64-bit integer vector operand to the specified memory location, as @@ -2601,8 +2601,8 @@ ///11: Bits [127:96] copied from the specified operand. /// \returns A 128-bit vector of [4 x float] containing the shuffled values. #define _mm_shuffle_ps(a, b, mask) \ - (__m128)__builtin_ia32_shufps((__v4sf)(__m128)(a), (__v4sf)(__m128)(b), \ -(int)(mask)) + ((__m128)__builtin_ia32_shufps((__v4sf)(__m128)(a), (__v4sf)(__m128)(b), \ + (int)(mask))) /// Unpacks the high-order (index 2,3) values from two 128-bit vectors of ///[4 x float] and interleaves them into a 128-bit vector of [4 x float]. Index: clang/lib/Headers/tmmintrin.h === --- clang/lib/Headers/tmmintrin.h +++ clang/lib/Headers/tmmintrin.h @@ -145,8 +145,8 @@ /// \returns A 128-bit integer vector containing the concatenated right-shifted ///value. #define _mm_alignr_epi8(a, b, n) \ - (__m128i)__builtin_ia32_palignr128((__v16qi)(__m128i)(a), \ - (__v16qi)(__m128i)(b), (n)) + ((__m128i)__builtin_ia32_palignr128((__v16qi)(__m128i)(a), \ + (__v16qi)(__m128i)(b), (n))) /// Concatenates the two 64-bit integer vector operands, and right-shifts ///the result by the number of bytes specified in the immediate operand. @@ -168,7 +168,7 @@ /// \returns A 64-bit integer vector containing the concatenated right-shifted ///value. #define _mm_alignr_pi8(a, b, n) \ - (__m64)__builtin_ia32_palignr((__v8qi)(__m64)(a), (__v8qi)(__m64)(b), (n)) + ((__m64)__builtin_ia32_palignr((__v8qi)(__m64)(a), (__v8qi)(__m64)(b), (n))) /// Horizontally adds the adjacent pairs of values contained in 2 packed ///128-bit vectors of [8 x i16]. Index: clang/lib/Headers/smmintrin.h === --- clang/lib/Headers/smmintrin.h +++ clang/lib/Headers/smmintrin.h @@ -231,7 +231,7 @@ /// 11: Truncated /// \returns A 128-bit vector of [4 x float] containing the rounded values. #define _mm_round_ps(X, M) \ - (__m128)__builtin_ia32_roundps((__v4sf)(__m128)(X), (M)) + ((__m128)__builtin_ia32_roundps((__v4sf)(__m128)(X), (M))) /// Copies three upper elements of the first 128-bit vector operand to ///the corresponding three upper elements of the 128-bit result vector of @@ -272,8 +272,8 @@ /// \returns A 128-bit vector of [4 x float] containing the copied and rounded ///values. #define _mm_round_ss(X, Y, M) \ - (__m128)__builtin_ia32_roundss((__v4sf)(__m128)(X), \ - (__v4sf)(__m128)(Y), (M)) + ((__m128)__builtin_ia32_roundss((__v4sf)(__m128)(X), \ + (__v4sf)(__m128)(Y), (M))) /// Rounds each element of the 128-bit vector of [2 x double] to an ///integer value according to the rounding control specified by the second @@ -306,7 +306,7 @@ /// 11: Truncated /// \returns A 128-bit vector of [2 x double]
[PATCH] D107843: [X86] Add parentheses around casts in some of the X86 intrinsic headers.
rsmith added a comment. I found a few more macro hygiene issues in these headers. Comment at: clang/lib/Headers/avx2intrin.h:23 #define _mm256_mpsadbw_epu8(X, Y, M) \ (__m256i)__builtin_ia32_mpsadbw256((__v32qi)(__m256i)(X), \ (__v32qi)(__m256i)(Y), (int)(M)) Parens missing here still Comment at: clang/lib/Headers/avx2intrin.h:1094 #define _mm256_i64gather_ps(m, i, s) \ (__m128)__builtin_ia32_gatherq_ps256((__v4sf)_mm_undefined_ps(), \ (float const *)(m), \ Parens missing here still. Comment at: clang/lib/Headers/smmintrin.h:868-871 #define _mm_extract_ps(X, N) (__extension__ \ ({ union { int __i; float __f; } __t; \ __t.__f = __builtin_ia32_vec_ext_v4sf((__v4sf)(__m128)(X), (int)(N)); \ __t.__i;})) This is gross. I wonder if we can use `__builtin_bit_cast` here instead of a cast through a union. Comment at: clang/lib/Headers/smmintrin.h:876 #define _MM_EXTRACT_FLOAT(D, X, N) \ { (D) = __builtin_ia32_vec_ext_v4sf((__v4sf)(__m128)(X), (int)(N)); } The existing code is the wrong way to define a statement-like macro, but I can't find any documentation (anywhere!) for `_MM_EXTRACT_FLOAT` to confirm what the expected valid uses are. The existing formulation would not work in contexts such as: ``` if (1) _MM_EXTRACT_FLOAT(d, x, n); else ... ``` ... because the semicolon would terminate the `if`, and it would incorrectly work in contexts requiring braces such as: ``` void f(float *pd, __m128 x, int n) _MM_EXTRACT_FLOAT(*pd, x, n) ``` Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D107843/new/ https://reviews.llvm.org/D107843 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D107843: [X86] Add parentheses around casts in some of the X86 intrinsic headers.
pengfei accepted this revision. pengfei added a comment. This revision is now accepted and ready to land. Thanks Craig. I haven't find time to work on it. I think this is a good start. LGTM. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D107843/new/ https://reviews.llvm.org/D107843 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D107843: [X86] Add parentheses around casts in some of the X86 intrinsic headers.
spatel added a comment. Hard to see through all of the lint noise, but seems like a mechanical fix. Can we add a test like the one in the bug report? https://godbolt.org/z/sPT8e9vx9 Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D107843/new/ https://reviews.llvm.org/D107843 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D107843: [X86] Add parentheses around casts in some of the X86 intrinsic headers.
craig.topper created this revision. craig.topper added reviewers: RKSimon, spatel, pengfei. craig.topper requested review of this revision. Herald added a project: clang. This covers the SSE and AVX/AVX2 headers. AVX512 has a lot more macros due to rounding mode. Fixes part of PR51324. Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D107843 Files: clang/lib/Headers/__wmmintrin_aes.h clang/lib/Headers/avx2intrin.h clang/lib/Headers/avxintrin.h clang/lib/Headers/emmintrin.h clang/lib/Headers/smmintrin.h clang/lib/Headers/tmmintrin.h clang/lib/Headers/xmmintrin.h Index: clang/lib/Headers/xmmintrin.h === --- clang/lib/Headers/xmmintrin.h +++ clang/lib/Headers/xmmintrin.h @@ -2181,7 +2181,7 @@ ///3: Bits [63:48] are copied to the destination. /// \returns A 16-bit integer containing the extracted 16 bits of packed data. #define _mm_extract_pi16(a, n) \ - (int)__builtin_ia32_vec_ext_v4hi((__v4hi)a, (int)n) + ((int)__builtin_ia32_vec_ext_v4hi((__v4hi)a, (int)n)) /// Copies data from the 64-bit vector of [4 x i16] to the destination, ///and inserts the lower 16-bits of an integer operand at the 16-bit offset @@ -2212,7 +2212,7 @@ /// \returns A 64-bit integer vector containing the copied packed data from the ///operands. #define _mm_insert_pi16(a, d, n) \ - (__m64)__builtin_ia32_vec_set_v4hi((__v4hi)a, (int)d, (int)n) + ((__m64)__builtin_ia32_vec_set_v4hi((__v4hi)a, (int)d, (int)n)) /// Compares each of the corresponding packed 16-bit integer values of ///the 64-bit integer vectors, and writes the greater value to the @@ -2359,7 +2359,7 @@ ///11: assigned from bits [63:48] of \a a. /// \returns A 64-bit integer vector containing the shuffled values. #define _mm_shuffle_pi16(a, n) \ - (__m64)__builtin_ia32_pshufw((__v4hi)(__m64)(a), (n)) + ((__m64)__builtin_ia32_pshufw((__v4hi)(__m64)(a), (n))) /// Conditionally copies the values from each 8-bit element in the first ///64-bit integer vector operand to the specified memory location, as @@ -2601,8 +2601,8 @@ ///11: Bits [127:96] copied from the specified operand. /// \returns A 128-bit vector of [4 x float] containing the shuffled values. #define _mm_shuffle_ps(a, b, mask) \ - (__m128)__builtin_ia32_shufps((__v4sf)(__m128)(a), (__v4sf)(__m128)(b), \ -(int)(mask)) + ((__m128)__builtin_ia32_shufps((__v4sf)(__m128)(a), (__v4sf)(__m128)(b), \ + (int)(mask))) /// Unpacks the high-order (index 2,3) values from two 128-bit vectors of ///[4 x float] and interleaves them into a 128-bit vector of [4 x float]. Index: clang/lib/Headers/tmmintrin.h === --- clang/lib/Headers/tmmintrin.h +++ clang/lib/Headers/tmmintrin.h @@ -145,8 +145,8 @@ /// \returns A 128-bit integer vector containing the concatenated right-shifted ///value. #define _mm_alignr_epi8(a, b, n) \ - (__m128i)__builtin_ia32_palignr128((__v16qi)(__m128i)(a), \ - (__v16qi)(__m128i)(b), (n)) + ((__m128i)__builtin_ia32_palignr128((__v16qi)(__m128i)(a), \ + (__v16qi)(__m128i)(b), (n))) /// Concatenates the two 64-bit integer vector operands, and right-shifts ///the result by the number of bytes specified in the immediate operand. @@ -168,7 +168,7 @@ /// \returns A 64-bit integer vector containing the concatenated right-shifted ///value. #define _mm_alignr_pi8(a, b, n) \ - (__m64)__builtin_ia32_palignr((__v8qi)(__m64)(a), (__v8qi)(__m64)(b), (n)) + ((__m64)__builtin_ia32_palignr((__v8qi)(__m64)(a), (__v8qi)(__m64)(b), (n))) /// Horizontally adds the adjacent pairs of values contained in 2 packed ///128-bit vectors of [8 x i16]. Index: clang/lib/Headers/smmintrin.h === --- clang/lib/Headers/smmintrin.h +++ clang/lib/Headers/smmintrin.h @@ -231,7 +231,7 @@ /// 11: Truncated /// \returns A 128-bit vector of [4 x float] containing the rounded values. #define _mm_round_ps(X, M) \ - (__m128)__builtin_ia32_roundps((__v4sf)(__m128)(X), (M)) + ((__m128)__builtin_ia32_roundps((__v4sf)(__m128)(X), (M))) /// Copies three upper elements of the first 128-bit vector operand to ///the corresponding three upper elements of the 128-bit result vector of @@ -272,8 +272,8 @@ /// \returns A 128-bit vector of [4 x float] containing the copied and rounded ///values. #define _mm_round_ss(X, Y, M) \ - (__m128)__builtin_ia32_roundss((__v4sf)(__m128)(X), \ - (__v4sf)(__m128)(Y), (M)) + ((__m128)__builtin_ia32_roundss((__v4sf)(__m128)(X), \ + (__v4sf)(__m128)(Y), (M))) /// Rounds each element of the 128-bit vector of [2 x double] to an ///integer value according to the rounding control