[clang] 3cfae43 - [CIR] Add support for X86 pmovqd512_mask and pmovwb512_mask builtins (#173802)

via cfe-commits Mon, 12 Jan 2026 13:05:45 -0800

Author: DannyDaoBoYang
Date: 2026-01-12T13:05:35-08:00
New Revision: 3cfae43d07a478cbf75454419842824d62aedd40


URL: 
https://github.com/llvm/llvm-project/commit/3cfae43d07a478cbf75454419842824d62aedd40
DIFF: 
https://github.com/llvm/llvm-project/commit/3cfae43d07a478cbf75454419842824d62aedd40.diff

LOG: [CIR] Add support for X86 pmovqd512_mask and pmovwb512_mask builtins 
(#173802)

-Add CIR support for pmovqd512_mask.
-In addition, I noticed pmovwb512_mask uses identical
truncation-and-selection logic
[X86.cpp.](https://github.com/llvm/llvm-project/blob/main/clang/lib/CodeGen/TargetBuiltins/X86.cpp#L1638),
so I added support for pmovwb512_mask using the same logic.
-Added a new LIT test case in
clang/test/CIR/CodeGen/pmovqd-mask-builtins.c to verify correct CIR
generation for both builtins.
-Added a type cast fix in CirGenCleanup.cpp where std::max encountered a
type mismatch between size_t and unsigned long on Win64. This caused
compile error on Windows for me while building it. Edit: This fix has
been moved to here https://github.com/llvm/llvm-project/pull/174519

This is my first request to this repo. If you noticed anything wrong
please comment and I'm looking forward for corrections.

Addresses #167765

Added: 
    

Modified: 
    clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
    clang/test/CIR/CodeGenBuiltins/X86/avx512bw-builtins.c
    clang/test/CIR/CodeGenBuiltins/X86/avx512f-builtins.c

Removed: 
    


################################################################################
diff  --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp 
b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index eeecc9cdaa741..f88e57aca6a08 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -1315,7 +1315,12 @@ CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, 
const CallExpr *expr) {
                                     mask);
   }
   case X86::BI__builtin_ia32_pmovqd512_mask:
-  case X86::BI__builtin_ia32_pmovwb512_mask:
+  case X86::BI__builtin_ia32_pmovwb512_mask: {
+    mlir::Value Res =
+        builder.createIntCast(ops[0], cast<cir::VectorType>(ops[1].getType()));
+    return emitX86Select(builder, getLoc(expr->getExprLoc()), ops[2], Res,
+                         ops[1]);
+  }
   case X86::BI__builtin_ia32_pblendw128:
   case X86::BI__builtin_ia32_blendpd:
   case X86::BI__builtin_ia32_blendps:

diff  --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512bw-builtins.c 
b/clang/test/CIR/CodeGenBuiltins/X86/avx512bw-builtins.c
index b4a3fb5ac4c61..d52cd4d63245b 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/avx512bw-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512bw-builtins.c
@@ -800,7 +800,70 @@ __mmask32 test_mm512_movepi16_mask(__m512i __A) {
   // OGCG-LABEL: {{.*}}movepi16_mask{{.*}}(
   // OGCG: [[CMP:%.*]] = icmp slt <32 x i16> %{{.*}}, zeroinitializer
   // OGCG: bitcast <32 x i1> [[CMP]] to i32
-  return _mm512_movepi16_mask(__A);
+  return _mm512_movepi16_mask(__A); 
+}
+
+__m256i test_mm512_cvtepi16_epi8(__m512i __A) {
+  // CIR-LABEL: test_mm512_cvtepi16_epi8
+  // CIR: %[[TRUNC:.*]] = cir.cast integral {{.*}} : !cir.vector<32 x !s16i> 
-> !cir.vector<32 x !s8i>
+  // CIR: %[[RETBC:.*]] = cir.cast bitcast {{.*}} : !cir.vector<32 x !s8i> -> 
!cir.vector<4 x !s64i>
+  // CIR: cir.store %[[RETBC]], %[[RETPTR:.*]] : !cir.vector<4 x !s64i>, 
!cir.ptr<!cir.vector<4 x !s64i>>
+  // CIR: %[[RETLOAD:.*]] = cir.load %[[RETPTR]] : !cir.ptr<!cir.vector<4 x 
!s64i>>, !cir.vector<4 x !s64i>
+  // CIR: cir.return %[[RETLOAD]] : !cir.vector<4 x !s64i>
+  
+  // LLVM-LABEL: test_mm512_cvtepi16_epi8
+  // LLVM: %[[TRUNC:.*]] = trunc <32 x i16> %{{.*}} to <32 x i8>
+  // LLVM: bitcast <32 x i8> %[[TRUNC]] to <4 x i64>
+  
+  // OGCG-LABEL: test_mm512_cvtepi16_epi8
+  // OGCG: %[[TRUNC:.*]] = trunc <32 x i16> %{{.*}} to <32 x i8>
+  // OGCG: bitcast <32 x i8> %[[TRUNC]] to <4 x i64>
+  return _mm512_cvtepi16_epi8(__A);
+}
+
+__m256i test_mm512_mask_cvtepi16_epi8(__m256i __O, __mmask32 __M, __m512i __A) 
{
+  // CIR-LABEL: test_mm512_mask_cvtepi16_epi8
+  // CIR: %[[TRUNC:.*]] = cir.cast integral {{.*}} : !cir.vector<32 x !s16i> 
-> !cir.vector<32 x !s8i>
+  // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast {{.*}} : !u32i -> !cir.vector<32 
x !cir.int<s, 1>>
+  // CIR: %[[TER:.*]] = cir.vec.ternary(%[[MASK_VEC]], %[[TRUNC]], {{.*}}) : 
!cir.vector<32 x !cir.int<s, 1>>, !cir.vector<32 x !s8i>
+  // CIR: %[[RETBC:.*]] = cir.cast bitcast %[[TER]] : !cir.vector<32 x !s8i> 
-> !cir.vector<4 x !s64i>
+  // CIR: cir.store %[[RETBC]], %[[RETPTR:.*]] : !cir.vector<4 x !s64i>, 
!cir.ptr<!cir.vector<4 x !s64i>>
+  // CIR: %[[RETLOAD:.*]] = cir.load %[[RETPTR]] : !cir.ptr<!cir.vector<4 x 
!s64i>>, !cir.vector<4 x !s64i>
+  // CIR: cir.return %[[RETLOAD]] : !cir.vector<4 x !s64i>
+  
+  // LLVM-LABEL: test_mm512_mask_cvtepi16_epi8
+  // LLVM: %[[TRUNC:.*]] = trunc <32 x i16> %{{.*}} to <32 x i8>
+  // LLVM: %[[MASK_VEC:.*]] = bitcast i32 %{{.*}} to <32 x i1>
+  // LLVM: %[[SEL:.*]] = select <32 x i1> %[[MASK_VEC]], <32 x i8> %[[TRUNC]], 
<32 x i8> %{{.*}}
+  // LLVM: bitcast <32 x i8> %[[SEL]] to <4 x i64>
+  
+  // OGCG-LABEL: test_mm512_mask_cvtepi16_epi8
+  // OGCG: %[[TRUNC:.*]] = trunc <32 x i16> %{{.*}} to <32 x i8>
+  // OGCG: %[[MASK_VEC:.*]] = bitcast i32 %{{.*}} to <32 x i1>
+  // OGCG: %[[SEL:.*]] = select <32 x i1> %[[MASK_VEC]], <32 x i8> %[[TRUNC]], 
<32 x i8> %{{.*}}
+  // OGCG: bitcast <32 x i8> %[[SEL]] to <4 x i64>
+  return _mm512_mask_cvtepi16_epi8(__O, __M, __A);
+}
+
+__m256i test_mm512_maskz_cvtepi16_epi8(__mmask32 __M, __m512i __A) {
+  // CIR-LABEL: test_mm512_maskz_cvtepi16_epi8
+  // CIR: %[[CALL:.*]] = cir.call {{.*}} : (!u32i, !cir.vector<8 x !s64i>) -> 
!cir.vector<4 x !s64i>
+  // CIR: cir.store %[[CALL]], %[[RETPTR:.*]] : !cir.vector<4 x !s64i>, 
!cir.ptr<!cir.vector<4 x !s64i>>
+  // CIR: %[[RETLOAD:.*]] = cir.load %[[RETPTR]] : !cir.ptr<!cir.vector<4 x 
!s64i>>, !cir.vector<4 x !s64i>
+  // CIR: cir.return %[[RETLOAD]] : !cir.vector<4 x !s64i>
+  
+  // LLVM-LABEL: test_mm512_maskz_cvtepi16_epi8
+  // LLVM: %[[TRUNC:.*]] = trunc <32 x i16> %{{.*}} to <32 x i8>
+  // LLVM: %[[MASK_VEC:.*]] = bitcast i32 %{{.*}} to <32 x i1>
+  // LLVM: %[[SEL:.*]] = select <32 x i1> %[[MASK_VEC]], <32 x i8> %[[TRUNC]], 
<32 x i8> {{.*}}
+  // LLVM: bitcast <32 x i8> %[[SEL]] to <4 x i64>
+  
+  // OGCG-LABEL: test_mm512_maskz_cvtepi16_epi8
+  // OGCG: %[[TRUNC:.*]] = trunc <32 x i16> %{{.*}} to <32 x i8>
+  // OGCG: %[[MASK_VEC:.*]] = bitcast i32 %{{.*}} to <32 x i1>
+  // OGCG: %[[SEL:.*]] = select <32 x i1> %[[MASK_VEC]], <32 x i8> %[[TRUNC]], 
<32 x i8> {{.*}}
+  // OGCG: bitcast <32 x i8> %[[SEL]] to <4 x i64>
+  return _mm512_maskz_cvtepi16_epi8(__M, __A);
 }
 
 __m512i test_mm512_alignr_epi8(__m512i __A,__m512i __B) {

diff  --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512f-builtins.c 
b/clang/test/CIR/CodeGenBuiltins/X86/avx512f-builtins.c
index 47d101e79ec15..19f80d9eb6030 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/avx512f-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512f-builtins.c
@@ -1053,3 +1053,66 @@ int test_mm512_kortestz(__mmask16 __A, __mmask16 __B) {
   // OGCG: zext i1 %[[CMP]] to i32
   return _mm512_kortestz(__A,__B);
 }
+
+__m256i test_mm512_cvtepi64_epi32(__m512i __A) {
+  // CIR-LABEL: test_mm512_cvtepi64_epi32
+  // CIR: %[[TRUNC:.*]] = cir.cast integral {{.*}} : !cir.vector<8 x !s64i> -> 
!cir.vector<8 x !s32i>
+  // CIR: %[[RETBC:.*]] = cir.cast bitcast {{.*}} : !cir.vector<8 x !s32i> -> 
!cir.vector<4 x !s64i>
+  // CIR: cir.store %[[RETBC]], %[[RETPTR:.*]] : !cir.vector<4 x !s64i>, 
!cir.ptr<!cir.vector<4 x !s64i>>
+  // CIR: %[[RETLOAD:.*]] = cir.load %[[RETPTR]] : !cir.ptr<!cir.vector<4 x 
!s64i>>, !cir.vector<4 x !s64i>
+  // CIR: cir.return %[[RETLOAD]] : !cir.vector<4 x !s64i>
+  
+  // LLVM-LABEL: test_mm512_cvtepi64_epi32
+  // LLVM: %[[TRUNC:.*]] = trunc <8 x i64> %{{.*}} to <8 x i32>
+  // LLVM: bitcast <8 x i32> %[[TRUNC]] to <4 x i64>
+  
+  // OGCG-LABEL: test_mm512_cvtepi64_epi32
+  // OGCG: %[[TRUNC:.*]] = trunc <8 x i64> %{{.*}} to <8 x i32>
+  // OGCG: bitcast <8 x i32> %[[TRUNC]] to <4 x i64>
+  return _mm512_cvtepi64_epi32(__A);
+}
+
+__m256i test_mm512_mask_cvtepi64_epi32(__m256i __O, __mmask8 __M, __m512i __A) 
{
+  // CIR-LABEL: test_mm512_mask_cvtepi64_epi32
+  // CIR: %[[TRUNC:.*]] = cir.cast integral {{.*}} : !cir.vector<8 x !s64i> -> 
!cir.vector<8 x !s32i>
+  // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast {{.*}} : !u8i -> !cir.vector<8 x 
!cir.int<s, 1>>
+  // CIR: %[[TER:.*]] = cir.vec.ternary(%[[MASK_VEC]], %[[TRUNC]], {{.*}}) : 
!cir.vector<8 x !cir.int<s, 1>>, !cir.vector<8 x !s32i>
+  // CIR: %[[RETBC:.*]] = cir.cast bitcast %[[TER]] : !cir.vector<8 x !s32i> 
-> !cir.vector<4 x !s64i>
+  // CIR: cir.store %[[RETBC]], %[[RETPTR:.*]] : !cir.vector<4 x !s64i>, 
!cir.ptr<!cir.vector<4 x !s64i>>
+  // CIR: %[[RETLOAD:.*]] = cir.load %[[RETPTR]] : !cir.ptr<!cir.vector<4 x 
!s64i>>, !cir.vector<4 x !s64i>
+  // CIR: cir.return %[[RETLOAD]] : !cir.vector<4 x !s64i>
+  
+  // LLVM-LABEL: test_mm512_mask_cvtepi64_epi32
+  // LLVM: %[[TRUNC:.*]] = trunc <8 x i64> %{{.*}} to <8 x i32>
+  // LLVM: %[[MASK_VEC:.*]] = bitcast i8 %{{.*}} to <8 x i1>
+  // LLVM: %[[SEL:.*]] = select <8 x i1> %[[MASK_VEC]], <8 x i32> %[[TRUNC]], 
<8 x i32> %{{.*}}
+  // LLVM: bitcast <8 x i32> %[[SEL]] to <4 x i64>
+  
+  // OGCG-LABEL: test_mm512_mask_cvtepi64_epi32
+  // OGCG: %[[TRUNC:.*]] = trunc <8 x i64> %{{.*}} to <8 x i32>
+  // OGCG: %[[MASK_VEC:.*]] = bitcast i8 %{{.*}} to <8 x i1>
+  // OGCG: %[[SEL:.*]] = select <8 x i1> %[[MASK_VEC]], <8 x i32> %[[TRUNC]], 
<8 x i32> %{{.*}}
+  // OGCG: bitcast <8 x i32> %[[SEL]] to <4 x i64>
+  return _mm512_mask_cvtepi64_epi32(__O, __M, __A);
+}
+
+__m256i test_mm512_maskz_cvtepi64_epi32(__mmask8 __M, __m512i __A) {
+  // CIR-LABEL: test_mm512_maskz_cvtepi64_epi32
+  // CIR: %[[CALL:.*]] = cir.call {{.*}} : (!u8i, !cir.vector<8 x !s64i>) -> 
!cir.vector<4 x !s64i>
+  // CIR: cir.store %[[CALL]], %[[RETPTR:.*]] : !cir.vector<4 x !s64i>, 
!cir.ptr<!cir.vector<4 x !s64i>>
+  // CIR: %[[RETLOAD:.*]] = cir.load %[[RETPTR]] : !cir.ptr<!cir.vector<4 x 
!s64i>>, !cir.vector<4 x !s64i>
+  // CIR: cir.return %[[RETLOAD]] : !cir.vector<4 x !s64i>
+  
+  // LLVM-LABEL: test_mm512_maskz_cvtepi64_epi32
+  // LLVM: %[[TRUNC:.*]] = trunc <8 x i64> %{{.*}} to <8 x i32>
+  // LLVM: %[[MASK_VEC:.*]] = bitcast i8 %{{.*}} to <8 x i1>
+  // LLVM: %[[SEL:.*]] = select <8 x i1> %[[MASK_VEC]], <8 x i32> %[[TRUNC]], 
<8 x i32> {{.*}}
+  // LLVM: bitcast <8 x i32> %[[SEL]] to <4 x i64>
+  
+  // OGCG-LABEL: test_mm512_maskz_cvtepi64_epi32
+  // OGCG: %[[TRUNC:.*]] = trunc <8 x i64> %{{.*}} to <8 x i32>
+  // OGCG: %[[MASK_VEC:.*]] = bitcast i8 %{{.*}} to <8 x i1>
+  // OGCG: %[[SEL:.*]] = select <8 x i1> %[[MASK_VEC]], <8 x i32> %[[TRUNC]], 
<8 x i32> {{.*}}
+  // OGCG: bitcast <8 x i32> %[[SEL]] to <4 x i64>
+  return _mm512_maskz_cvtepi64_epi32(__M, __A);
+}


        
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] 3cfae43 - [CIR] Add support for X86 pmovqd512_mask and pmovwb512_mask builtins (#173802)

Reply via email to