[PATCH] D107843: [X86] Add parentheses around casts in some of the X86 intrinsic headers.

2021-08-13 Thread Craig Topper via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG4190d99dfcab: [X86] Add parentheses around casts in some of 
the X86 intrinsic headers. (authored by craig.topper).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D107843/new/

https://reviews.llvm.org/D107843

Files:
  clang/lib/Headers/__wmmintrin_aes.h
  clang/lib/Headers/avx2intrin.h
  clang/lib/Headers/avxintrin.h
  clang/lib/Headers/emmintrin.h
  clang/lib/Headers/smmintrin.h
  clang/lib/Headers/tmmintrin.h
  clang/lib/Headers/xmmintrin.h
  clang/test/CodeGen/X86/sse41-builtins.c

Index: clang/test/CodeGen/X86/sse41-builtins.c
===
--- clang/test/CodeGen/X86/sse41-builtins.c
+++ clang/test/CodeGen/X86/sse41-builtins.c
@@ -393,3 +393,11 @@
   // CHECK: call i32 @llvm.x86.sse41.ptestz(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
   return _mm_testz_si128(x, y);
 }
+
+// Make sure brackets work after macro intrinsics.
+float pr51324(__m128 a) {
+  // CHECK-LABEL: pr51324
+  // CHECK: call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %{{.*}}, i32 0)
+  // CHECK: extractelement <4 x float> %{{.*}}, i32 0
+  return _mm_round_ps(a, 0)[0];
+}
Index: clang/lib/Headers/xmmintrin.h
===
--- clang/lib/Headers/xmmintrin.h
+++ clang/lib/Headers/xmmintrin.h
@@ -2181,7 +2181,7 @@
 ///3: Bits [63:48] are copied to the destination.
 /// \returns A 16-bit integer containing the extracted 16 bits of packed data.
 #define _mm_extract_pi16(a, n) \
-  (int)__builtin_ia32_vec_ext_v4hi((__v4hi)a, (int)n)
+  ((int)__builtin_ia32_vec_ext_v4hi((__v4hi)a, (int)n))
 
 /// Copies data from the 64-bit vector of [4 x i16] to the destination,
 ///and inserts the lower 16-bits of an integer operand at the 16-bit offset
@@ -2212,7 +2212,7 @@
 /// \returns A 64-bit integer vector containing the copied packed data from the
 ///operands.
 #define _mm_insert_pi16(a, d, n) \
-  (__m64)__builtin_ia32_vec_set_v4hi((__v4hi)a, (int)d, (int)n)
+  ((__m64)__builtin_ia32_vec_set_v4hi((__v4hi)a, (int)d, (int)n))
 
 /// Compares each of the corresponding packed 16-bit integer values of
 ///the 64-bit integer vectors, and writes the greater value to the
@@ -2359,7 +2359,7 @@
 ///11: assigned from bits [63:48] of \a a.
 /// \returns A 64-bit integer vector containing the shuffled values.
 #define _mm_shuffle_pi16(a, n) \
-  (__m64)__builtin_ia32_pshufw((__v4hi)(__m64)(a), (n))
+  ((__m64)__builtin_ia32_pshufw((__v4hi)(__m64)(a), (n)))
 
 /// Conditionally copies the values from each 8-bit element in the first
 ///64-bit integer vector operand to the specified memory location, as
@@ -2601,8 +2601,8 @@
 ///11: Bits [127:96] copied from the specified operand.
 /// \returns A 128-bit vector of [4 x float] containing the shuffled values.
 #define _mm_shuffle_ps(a, b, mask) \
-  (__m128)__builtin_ia32_shufps((__v4sf)(__m128)(a), (__v4sf)(__m128)(b), \
-(int)(mask))
+  ((__m128)__builtin_ia32_shufps((__v4sf)(__m128)(a), (__v4sf)(__m128)(b), \
+ (int)(mask)))
 
 /// Unpacks the high-order (index 2,3) values from two 128-bit vectors of
 ///[4 x float] and interleaves them into a 128-bit vector of [4 x float].
Index: clang/lib/Headers/tmmintrin.h
===
--- clang/lib/Headers/tmmintrin.h
+++ clang/lib/Headers/tmmintrin.h
@@ -145,8 +145,8 @@
 /// \returns A 128-bit integer vector containing the concatenated right-shifted
 ///value.
 #define _mm_alignr_epi8(a, b, n) \
-  (__m128i)__builtin_ia32_palignr128((__v16qi)(__m128i)(a), \
- (__v16qi)(__m128i)(b), (n))
+  ((__m128i)__builtin_ia32_palignr128((__v16qi)(__m128i)(a), \
+  (__v16qi)(__m128i)(b), (n)))
 
 /// Concatenates the two 64-bit integer vector operands, and right-shifts
 ///the result by the number of bytes specified in the immediate operand.
@@ -168,7 +168,7 @@
 /// \returns A 64-bit integer vector containing the concatenated right-shifted
 ///value.
 #define _mm_alignr_pi8(a, b, n) \
-  (__m64)__builtin_ia32_palignr((__v8qi)(__m64)(a), (__v8qi)(__m64)(b), (n))
+  ((__m64)__builtin_ia32_palignr((__v8qi)(__m64)(a), (__v8qi)(__m64)(b), (n)))
 
 /// Horizontally adds the adjacent pairs of values contained in 2 packed
 ///128-bit vectors of [8 x i16].
Index: clang/lib/Headers/smmintrin.h
===
--- clang/lib/Headers/smmintrin.h
+++ clang/lib/Headers/smmintrin.h
@@ -231,7 +231,7 @@
 ///  11: Truncated
 /// \returns A 128-bit vector of [4 x float] containing the rounded values.
 #define _mm_round_ps(X, M) \
-  (__m128)__builtin_ia32_roundps((__v4sf)(__m128)(X), (M))
+  

[PATCH] D107843: [X86] Add parentheses around casts in some of the X86 intrinsic headers.

2021-08-13 Thread Sanjay Patel via Phabricator via cfe-commits
spatel added a comment.

Probably want to address the other cleanups in another patch; the parens fixes 
and test LGTM.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D107843/new/

https://reviews.llvm.org/D107843

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D107843: [X86] Add parentheses around casts in some of the X86 intrinsic headers.

2021-08-12 Thread Craig Topper via Phabricator via cfe-commits
craig.topper updated this revision to Diff 366071.
craig.topper added a comment.

Add test case to the bottom of sse41-builtins.c


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D107843/new/

https://reviews.llvm.org/D107843

Files:
  clang/lib/Headers/__wmmintrin_aes.h
  clang/lib/Headers/avx2intrin.h
  clang/lib/Headers/avxintrin.h
  clang/lib/Headers/emmintrin.h
  clang/lib/Headers/smmintrin.h
  clang/lib/Headers/tmmintrin.h
  clang/lib/Headers/xmmintrin.h
  clang/test/CodeGen/X86/sse41-builtins.c

Index: clang/test/CodeGen/X86/sse41-builtins.c
===
--- clang/test/CodeGen/X86/sse41-builtins.c
+++ clang/test/CodeGen/X86/sse41-builtins.c
@@ -393,3 +393,11 @@
   // CHECK: call i32 @llvm.x86.sse41.ptestz(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
   return _mm_testz_si128(x, y);
 }
+
+// Make sure brackets work after macro intrinsics.
+float pr51324(__m128 a) {
+  // CHECK-LABEL: pr51324
+  // CHECK: call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %{{.*}}, i32 0)
+  // CHECK: extractelement <4 x float> %{{.*}}, i32 0
+  return _mm_round_ps(a, 0)[0];
+}
Index: clang/lib/Headers/xmmintrin.h
===
--- clang/lib/Headers/xmmintrin.h
+++ clang/lib/Headers/xmmintrin.h
@@ -2181,7 +2181,7 @@
 ///3: Bits [63:48] are copied to the destination.
 /// \returns A 16-bit integer containing the extracted 16 bits of packed data.
 #define _mm_extract_pi16(a, n) \
-  (int)__builtin_ia32_vec_ext_v4hi((__v4hi)a, (int)n)
+  ((int)__builtin_ia32_vec_ext_v4hi((__v4hi)a, (int)n))
 
 /// Copies data from the 64-bit vector of [4 x i16] to the destination,
 ///and inserts the lower 16-bits of an integer operand at the 16-bit offset
@@ -2212,7 +2212,7 @@
 /// \returns A 64-bit integer vector containing the copied packed data from the
 ///operands.
 #define _mm_insert_pi16(a, d, n) \
-  (__m64)__builtin_ia32_vec_set_v4hi((__v4hi)a, (int)d, (int)n)
+  ((__m64)__builtin_ia32_vec_set_v4hi((__v4hi)a, (int)d, (int)n))
 
 /// Compares each of the corresponding packed 16-bit integer values of
 ///the 64-bit integer vectors, and writes the greater value to the
@@ -2359,7 +2359,7 @@
 ///11: assigned from bits [63:48] of \a a.
 /// \returns A 64-bit integer vector containing the shuffled values.
 #define _mm_shuffle_pi16(a, n) \
-  (__m64)__builtin_ia32_pshufw((__v4hi)(__m64)(a), (n))
+  ((__m64)__builtin_ia32_pshufw((__v4hi)(__m64)(a), (n)))
 
 /// Conditionally copies the values from each 8-bit element in the first
 ///64-bit integer vector operand to the specified memory location, as
@@ -2601,8 +2601,8 @@
 ///11: Bits [127:96] copied from the specified operand.
 /// \returns A 128-bit vector of [4 x float] containing the shuffled values.
 #define _mm_shuffle_ps(a, b, mask) \
-  (__m128)__builtin_ia32_shufps((__v4sf)(__m128)(a), (__v4sf)(__m128)(b), \
-(int)(mask))
+  ((__m128)__builtin_ia32_shufps((__v4sf)(__m128)(a), (__v4sf)(__m128)(b), \
+ (int)(mask)))
 
 /// Unpacks the high-order (index 2,3) values from two 128-bit vectors of
 ///[4 x float] and interleaves them into a 128-bit vector of [4 x float].
Index: clang/lib/Headers/tmmintrin.h
===
--- clang/lib/Headers/tmmintrin.h
+++ clang/lib/Headers/tmmintrin.h
@@ -145,8 +145,8 @@
 /// \returns A 128-bit integer vector containing the concatenated right-shifted
 ///value.
 #define _mm_alignr_epi8(a, b, n) \
-  (__m128i)__builtin_ia32_palignr128((__v16qi)(__m128i)(a), \
- (__v16qi)(__m128i)(b), (n))
+  ((__m128i)__builtin_ia32_palignr128((__v16qi)(__m128i)(a), \
+  (__v16qi)(__m128i)(b), (n)))
 
 /// Concatenates the two 64-bit integer vector operands, and right-shifts
 ///the result by the number of bytes specified in the immediate operand.
@@ -168,7 +168,7 @@
 /// \returns A 64-bit integer vector containing the concatenated right-shifted
 ///value.
 #define _mm_alignr_pi8(a, b, n) \
-  (__m64)__builtin_ia32_palignr((__v8qi)(__m64)(a), (__v8qi)(__m64)(b), (n))
+  ((__m64)__builtin_ia32_palignr((__v8qi)(__m64)(a), (__v8qi)(__m64)(b), (n)))
 
 /// Horizontally adds the adjacent pairs of values contained in 2 packed
 ///128-bit vectors of [8 x i16].
Index: clang/lib/Headers/smmintrin.h
===
--- clang/lib/Headers/smmintrin.h
+++ clang/lib/Headers/smmintrin.h
@@ -231,7 +231,7 @@
 ///  11: Truncated
 /// \returns A 128-bit vector of [4 x float] containing the rounded values.
 #define _mm_round_ps(X, M) \
-  (__m128)__builtin_ia32_roundps((__v4sf)(__m128)(X), (M))
+  ((__m128)__builtin_ia32_roundps((__v4sf)(__m128)(X), (M)))
 
 /// Copies three upper elements of the first 128-bit vector operand to
 ///the corresponding 

[PATCH] D107843: [X86] Add parentheses around casts in some of the X86 intrinsic headers.

2021-08-11 Thread Craig Topper via Phabricator via cfe-commits
craig.topper updated this revision to Diff 365880.
craig.topper added a comment.

Fix two functions I missed.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D107843/new/

https://reviews.llvm.org/D107843

Files:
  clang/lib/Headers/__wmmintrin_aes.h
  clang/lib/Headers/avx2intrin.h
  clang/lib/Headers/avxintrin.h
  clang/lib/Headers/emmintrin.h
  clang/lib/Headers/smmintrin.h
  clang/lib/Headers/tmmintrin.h
  clang/lib/Headers/xmmintrin.h

Index: clang/lib/Headers/xmmintrin.h
===
--- clang/lib/Headers/xmmintrin.h
+++ clang/lib/Headers/xmmintrin.h
@@ -2181,7 +2181,7 @@
 ///3: Bits [63:48] are copied to the destination.
 /// \returns A 16-bit integer containing the extracted 16 bits of packed data.
 #define _mm_extract_pi16(a, n) \
-  (int)__builtin_ia32_vec_ext_v4hi((__v4hi)a, (int)n)
+  ((int)__builtin_ia32_vec_ext_v4hi((__v4hi)a, (int)n))
 
 /// Copies data from the 64-bit vector of [4 x i16] to the destination,
 ///and inserts the lower 16-bits of an integer operand at the 16-bit offset
@@ -2212,7 +2212,7 @@
 /// \returns A 64-bit integer vector containing the copied packed data from the
 ///operands.
 #define _mm_insert_pi16(a, d, n) \
-  (__m64)__builtin_ia32_vec_set_v4hi((__v4hi)a, (int)d, (int)n)
+  ((__m64)__builtin_ia32_vec_set_v4hi((__v4hi)a, (int)d, (int)n))
 
 /// Compares each of the corresponding packed 16-bit integer values of
 ///the 64-bit integer vectors, and writes the greater value to the
@@ -2359,7 +2359,7 @@
 ///11: assigned from bits [63:48] of \a a.
 /// \returns A 64-bit integer vector containing the shuffled values.
 #define _mm_shuffle_pi16(a, n) \
-  (__m64)__builtin_ia32_pshufw((__v4hi)(__m64)(a), (n))
+  ((__m64)__builtin_ia32_pshufw((__v4hi)(__m64)(a), (n)))
 
 /// Conditionally copies the values from each 8-bit element in the first
 ///64-bit integer vector operand to the specified memory location, as
@@ -2601,8 +2601,8 @@
 ///11: Bits [127:96] copied from the specified operand.
 /// \returns A 128-bit vector of [4 x float] containing the shuffled values.
 #define _mm_shuffle_ps(a, b, mask) \
-  (__m128)__builtin_ia32_shufps((__v4sf)(__m128)(a), (__v4sf)(__m128)(b), \
-(int)(mask))
+  ((__m128)__builtin_ia32_shufps((__v4sf)(__m128)(a), (__v4sf)(__m128)(b), \
+ (int)(mask)))
 
 /// Unpacks the high-order (index 2,3) values from two 128-bit vectors of
 ///[4 x float] and interleaves them into a 128-bit vector of [4 x float].
Index: clang/lib/Headers/tmmintrin.h
===
--- clang/lib/Headers/tmmintrin.h
+++ clang/lib/Headers/tmmintrin.h
@@ -145,8 +145,8 @@
 /// \returns A 128-bit integer vector containing the concatenated right-shifted
 ///value.
 #define _mm_alignr_epi8(a, b, n) \
-  (__m128i)__builtin_ia32_palignr128((__v16qi)(__m128i)(a), \
- (__v16qi)(__m128i)(b), (n))
+  ((__m128i)__builtin_ia32_palignr128((__v16qi)(__m128i)(a), \
+  (__v16qi)(__m128i)(b), (n)))
 
 /// Concatenates the two 64-bit integer vector operands, and right-shifts
 ///the result by the number of bytes specified in the immediate operand.
@@ -168,7 +168,7 @@
 /// \returns A 64-bit integer vector containing the concatenated right-shifted
 ///value.
 #define _mm_alignr_pi8(a, b, n) \
-  (__m64)__builtin_ia32_palignr((__v8qi)(__m64)(a), (__v8qi)(__m64)(b), (n))
+  ((__m64)__builtin_ia32_palignr((__v8qi)(__m64)(a), (__v8qi)(__m64)(b), (n)))
 
 /// Horizontally adds the adjacent pairs of values contained in 2 packed
 ///128-bit vectors of [8 x i16].
Index: clang/lib/Headers/smmintrin.h
===
--- clang/lib/Headers/smmintrin.h
+++ clang/lib/Headers/smmintrin.h
@@ -231,7 +231,7 @@
 ///  11: Truncated
 /// \returns A 128-bit vector of [4 x float] containing the rounded values.
 #define _mm_round_ps(X, M) \
-  (__m128)__builtin_ia32_roundps((__v4sf)(__m128)(X), (M))
+  ((__m128)__builtin_ia32_roundps((__v4sf)(__m128)(X), (M)))
 
 /// Copies three upper elements of the first 128-bit vector operand to
 ///the corresponding three upper elements of the 128-bit result vector of
@@ -272,8 +272,8 @@
 /// \returns A 128-bit vector of [4 x float] containing the copied and rounded
 ///values.
 #define _mm_round_ss(X, Y, M) \
-  (__m128)__builtin_ia32_roundss((__v4sf)(__m128)(X), \
- (__v4sf)(__m128)(Y), (M))
+  ((__m128)__builtin_ia32_roundss((__v4sf)(__m128)(X), \
+  (__v4sf)(__m128)(Y), (M)))
 
 /// Rounds each element of the 128-bit vector of [2 x double] to an
 ///integer value according to the rounding control specified by the second
@@ -306,7 +306,7 @@
 ///  11: Truncated
 /// \returns A 128-bit vector of [2 x double] 

[PATCH] D107843: [X86] Add parentheses around casts in some of the X86 intrinsic headers.

2021-08-11 Thread Richard Smith - zygoloid via Phabricator via cfe-commits
rsmith added a comment.

I found a few more macro hygiene issues in these headers.




Comment at: clang/lib/Headers/avx2intrin.h:23
 #define _mm256_mpsadbw_epu8(X, Y, M) \
   (__m256i)__builtin_ia32_mpsadbw256((__v32qi)(__m256i)(X), \
  (__v32qi)(__m256i)(Y), (int)(M))

Parens missing here still



Comment at: clang/lib/Headers/avx2intrin.h:1094
 #define _mm256_i64gather_ps(m, i, s) \
   (__m128)__builtin_ia32_gatherq_ps256((__v4sf)_mm_undefined_ps(), \
(float const *)(m), \

Parens missing here still.



Comment at: clang/lib/Headers/smmintrin.h:868-871
 #define _mm_extract_ps(X, N) (__extension__  \
   ({ union { int __i; float __f; } __t;  \
  __t.__f = __builtin_ia32_vec_ext_v4sf((__v4sf)(__m128)(X), (int)(N)); \
  __t.__i;}))

This is gross. I wonder if we can use `__builtin_bit_cast` here instead of a 
cast through a union.




Comment at: clang/lib/Headers/smmintrin.h:876
 #define _MM_EXTRACT_FLOAT(D, X, N) \
   { (D) = __builtin_ia32_vec_ext_v4sf((__v4sf)(__m128)(X), (int)(N)); }
 

The existing code is the wrong way to define a statement-like macro, but I 
can't find any documentation (anywhere!) for `_MM_EXTRACT_FLOAT` to confirm 
what the expected valid uses are. The existing formulation would not work in 
contexts such as:

```
if (1)
  _MM_EXTRACT_FLOAT(d, x, n);
else
  ...
```

... because the semicolon would terminate the `if`, and it would incorrectly 
work in contexts requiring braces such as:

```
void f(float *pd, __m128 x, int n) _MM_EXTRACT_FLOAT(*pd, x, n)
```


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D107843/new/

https://reviews.llvm.org/D107843

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D107843: [X86] Add parentheses around casts in some of the X86 intrinsic headers.

2021-08-10 Thread Pengfei Wang via Phabricator via cfe-commits
pengfei accepted this revision.
pengfei added a comment.
This revision is now accepted and ready to land.

Thanks Craig. I haven't find time to work on it. I think this is a good start. 
LGTM.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D107843/new/

https://reviews.llvm.org/D107843

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D107843: [X86] Add parentheses around casts in some of the X86 intrinsic headers.

2021-08-10 Thread Sanjay Patel via Phabricator via cfe-commits
spatel added a comment.

Hard to see through all of the lint noise, but seems like a mechanical fix.
Can we add a test like the one in the bug report?
https://godbolt.org/z/sPT8e9vx9


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D107843/new/

https://reviews.llvm.org/D107843

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D107843: [X86] Add parentheses around casts in some of the X86 intrinsic headers.

2021-08-10 Thread Craig Topper via Phabricator via cfe-commits
craig.topper created this revision.
craig.topper added reviewers: RKSimon, spatel, pengfei.
craig.topper requested review of this revision.
Herald added a project: clang.

This covers the SSE and AVX/AVX2 headers. AVX512 has a lot more macros
due to rounding mode.

Fixes part of PR51324.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D107843

Files:
  clang/lib/Headers/__wmmintrin_aes.h
  clang/lib/Headers/avx2intrin.h
  clang/lib/Headers/avxintrin.h
  clang/lib/Headers/emmintrin.h
  clang/lib/Headers/smmintrin.h
  clang/lib/Headers/tmmintrin.h
  clang/lib/Headers/xmmintrin.h

Index: clang/lib/Headers/xmmintrin.h
===
--- clang/lib/Headers/xmmintrin.h
+++ clang/lib/Headers/xmmintrin.h
@@ -2181,7 +2181,7 @@
 ///3: Bits [63:48] are copied to the destination.
 /// \returns A 16-bit integer containing the extracted 16 bits of packed data.
 #define _mm_extract_pi16(a, n) \
-  (int)__builtin_ia32_vec_ext_v4hi((__v4hi)a, (int)n)
+  ((int)__builtin_ia32_vec_ext_v4hi((__v4hi)a, (int)n))
 
 /// Copies data from the 64-bit vector of [4 x i16] to the destination,
 ///and inserts the lower 16-bits of an integer operand at the 16-bit offset
@@ -2212,7 +2212,7 @@
 /// \returns A 64-bit integer vector containing the copied packed data from the
 ///operands.
 #define _mm_insert_pi16(a, d, n) \
-  (__m64)__builtin_ia32_vec_set_v4hi((__v4hi)a, (int)d, (int)n)
+  ((__m64)__builtin_ia32_vec_set_v4hi((__v4hi)a, (int)d, (int)n))
 
 /// Compares each of the corresponding packed 16-bit integer values of
 ///the 64-bit integer vectors, and writes the greater value to the
@@ -2359,7 +2359,7 @@
 ///11: assigned from bits [63:48] of \a a.
 /// \returns A 64-bit integer vector containing the shuffled values.
 #define _mm_shuffle_pi16(a, n) \
-  (__m64)__builtin_ia32_pshufw((__v4hi)(__m64)(a), (n))
+  ((__m64)__builtin_ia32_pshufw((__v4hi)(__m64)(a), (n)))
 
 /// Conditionally copies the values from each 8-bit element in the first
 ///64-bit integer vector operand to the specified memory location, as
@@ -2601,8 +2601,8 @@
 ///11: Bits [127:96] copied from the specified operand.
 /// \returns A 128-bit vector of [4 x float] containing the shuffled values.
 #define _mm_shuffle_ps(a, b, mask) \
-  (__m128)__builtin_ia32_shufps((__v4sf)(__m128)(a), (__v4sf)(__m128)(b), \
-(int)(mask))
+  ((__m128)__builtin_ia32_shufps((__v4sf)(__m128)(a), (__v4sf)(__m128)(b), \
+ (int)(mask)))
 
 /// Unpacks the high-order (index 2,3) values from two 128-bit vectors of
 ///[4 x float] and interleaves them into a 128-bit vector of [4 x float].
Index: clang/lib/Headers/tmmintrin.h
===
--- clang/lib/Headers/tmmintrin.h
+++ clang/lib/Headers/tmmintrin.h
@@ -145,8 +145,8 @@
 /// \returns A 128-bit integer vector containing the concatenated right-shifted
 ///value.
 #define _mm_alignr_epi8(a, b, n) \
-  (__m128i)__builtin_ia32_palignr128((__v16qi)(__m128i)(a), \
- (__v16qi)(__m128i)(b), (n))
+  ((__m128i)__builtin_ia32_palignr128((__v16qi)(__m128i)(a), \
+  (__v16qi)(__m128i)(b), (n)))
 
 /// Concatenates the two 64-bit integer vector operands, and right-shifts
 ///the result by the number of bytes specified in the immediate operand.
@@ -168,7 +168,7 @@
 /// \returns A 64-bit integer vector containing the concatenated right-shifted
 ///value.
 #define _mm_alignr_pi8(a, b, n) \
-  (__m64)__builtin_ia32_palignr((__v8qi)(__m64)(a), (__v8qi)(__m64)(b), (n))
+  ((__m64)__builtin_ia32_palignr((__v8qi)(__m64)(a), (__v8qi)(__m64)(b), (n)))
 
 /// Horizontally adds the adjacent pairs of values contained in 2 packed
 ///128-bit vectors of [8 x i16].
Index: clang/lib/Headers/smmintrin.h
===
--- clang/lib/Headers/smmintrin.h
+++ clang/lib/Headers/smmintrin.h
@@ -231,7 +231,7 @@
 ///  11: Truncated
 /// \returns A 128-bit vector of [4 x float] containing the rounded values.
 #define _mm_round_ps(X, M) \
-  (__m128)__builtin_ia32_roundps((__v4sf)(__m128)(X), (M))
+  ((__m128)__builtin_ia32_roundps((__v4sf)(__m128)(X), (M)))
 
 /// Copies three upper elements of the first 128-bit vector operand to
 ///the corresponding three upper elements of the 128-bit result vector of
@@ -272,8 +272,8 @@
 /// \returns A 128-bit vector of [4 x float] containing the copied and rounded
 ///values.
 #define _mm_round_ss(X, Y, M) \
-  (__m128)__builtin_ia32_roundss((__v4sf)(__m128)(X), \
- (__v4sf)(__m128)(Y), (M))
+  ((__m128)__builtin_ia32_roundss((__v4sf)(__m128)(X), \
+  (__v4sf)(__m128)(Y), (M)))
 
 /// Rounds each element of the 128-bit vector of [2 x double] to an
 ///integer value according to the rounding control