Author: DannyDaoBoYang Date: 2026-01-12T13:05:35-08:00 New Revision: 3cfae43d07a478cbf75454419842824d62aedd40
URL: https://github.com/llvm/llvm-project/commit/3cfae43d07a478cbf75454419842824d62aedd40 DIFF: https://github.com/llvm/llvm-project/commit/3cfae43d07a478cbf75454419842824d62aedd40.diff LOG: [CIR] Add support for X86 pmovqd512_mask and pmovwb512_mask builtins (#173802) -Add CIR support for pmovqd512_mask. -In addition, I noticed pmovwb512_mask uses identical truncation-and-selection logic [X86.cpp.](https://github.com/llvm/llvm-project/blob/main/clang/lib/CodeGen/TargetBuiltins/X86.cpp#L1638), so I added support for pmovwb512_mask using the same logic. -Added a new LIT test case in clang/test/CIR/CodeGen/pmovqd-mask-builtins.c to verify correct CIR generation for both builtins. -Added a type cast fix in CirGenCleanup.cpp where std::max encountered a type mismatch between size_t and unsigned long on Win64. This caused compile error on Windows for me while building it. Edit: This fix has been moved to here https://github.com/llvm/llvm-project/pull/174519 This is my first request to this repo. If you noticed anything wrong please comment and I'm looking forward for corrections. Addresses #167765 Added: Modified: clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp clang/test/CIR/CodeGenBuiltins/X86/avx512bw-builtins.c clang/test/CIR/CodeGenBuiltins/X86/avx512f-builtins.c Removed: ################################################################################ diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp index eeecc9cdaa741..f88e57aca6a08 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp @@ -1315,7 +1315,12 @@ CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, const CallExpr *expr) { mask); } case X86::BI__builtin_ia32_pmovqd512_mask: - case X86::BI__builtin_ia32_pmovwb512_mask: + case X86::BI__builtin_ia32_pmovwb512_mask: { + mlir::Value Res = + builder.createIntCast(ops[0], cast<cir::VectorType>(ops[1].getType())); + return emitX86Select(builder, getLoc(expr->getExprLoc()), ops[2], Res, + ops[1]); + } case X86::BI__builtin_ia32_pblendw128: case X86::BI__builtin_ia32_blendpd: case X86::BI__builtin_ia32_blendps: diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512bw-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512bw-builtins.c index b4a3fb5ac4c61..d52cd4d63245b 100644 --- a/clang/test/CIR/CodeGenBuiltins/X86/avx512bw-builtins.c +++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512bw-builtins.c @@ -800,7 +800,70 @@ __mmask32 test_mm512_movepi16_mask(__m512i __A) { // OGCG-LABEL: {{.*}}movepi16_mask{{.*}}( // OGCG: [[CMP:%.*]] = icmp slt <32 x i16> %{{.*}}, zeroinitializer // OGCG: bitcast <32 x i1> [[CMP]] to i32 - return _mm512_movepi16_mask(__A); + return _mm512_movepi16_mask(__A); +} + +__m256i test_mm512_cvtepi16_epi8(__m512i __A) { + // CIR-LABEL: test_mm512_cvtepi16_epi8 + // CIR: %[[TRUNC:.*]] = cir.cast integral {{.*}} : !cir.vector<32 x !s16i> -> !cir.vector<32 x !s8i> + // CIR: %[[RETBC:.*]] = cir.cast bitcast {{.*}} : !cir.vector<32 x !s8i> -> !cir.vector<4 x !s64i> + // CIR: cir.store %[[RETBC]], %[[RETPTR:.*]] : !cir.vector<4 x !s64i>, !cir.ptr<!cir.vector<4 x !s64i>> + // CIR: %[[RETLOAD:.*]] = cir.load %[[RETPTR]] : !cir.ptr<!cir.vector<4 x !s64i>>, !cir.vector<4 x !s64i> + // CIR: cir.return %[[RETLOAD]] : !cir.vector<4 x !s64i> + + // LLVM-LABEL: test_mm512_cvtepi16_epi8 + // LLVM: %[[TRUNC:.*]] = trunc <32 x i16> %{{.*}} to <32 x i8> + // LLVM: bitcast <32 x i8> %[[TRUNC]] to <4 x i64> + + // OGCG-LABEL: test_mm512_cvtepi16_epi8 + // OGCG: %[[TRUNC:.*]] = trunc <32 x i16> %{{.*}} to <32 x i8> + // OGCG: bitcast <32 x i8> %[[TRUNC]] to <4 x i64> + return _mm512_cvtepi16_epi8(__A); +} + +__m256i test_mm512_mask_cvtepi16_epi8(__m256i __O, __mmask32 __M, __m512i __A) { + // CIR-LABEL: test_mm512_mask_cvtepi16_epi8 + // CIR: %[[TRUNC:.*]] = cir.cast integral {{.*}} : !cir.vector<32 x !s16i> -> !cir.vector<32 x !s8i> + // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast {{.*}} : !u32i -> !cir.vector<32 x !cir.int<s, 1>> + // CIR: %[[TER:.*]] = cir.vec.ternary(%[[MASK_VEC]], %[[TRUNC]], {{.*}}) : !cir.vector<32 x !cir.int<s, 1>>, !cir.vector<32 x !s8i> + // CIR: %[[RETBC:.*]] = cir.cast bitcast %[[TER]] : !cir.vector<32 x !s8i> -> !cir.vector<4 x !s64i> + // CIR: cir.store %[[RETBC]], %[[RETPTR:.*]] : !cir.vector<4 x !s64i>, !cir.ptr<!cir.vector<4 x !s64i>> + // CIR: %[[RETLOAD:.*]] = cir.load %[[RETPTR]] : !cir.ptr<!cir.vector<4 x !s64i>>, !cir.vector<4 x !s64i> + // CIR: cir.return %[[RETLOAD]] : !cir.vector<4 x !s64i> + + // LLVM-LABEL: test_mm512_mask_cvtepi16_epi8 + // LLVM: %[[TRUNC:.*]] = trunc <32 x i16> %{{.*}} to <32 x i8> + // LLVM: %[[MASK_VEC:.*]] = bitcast i32 %{{.*}} to <32 x i1> + // LLVM: %[[SEL:.*]] = select <32 x i1> %[[MASK_VEC]], <32 x i8> %[[TRUNC]], <32 x i8> %{{.*}} + // LLVM: bitcast <32 x i8> %[[SEL]] to <4 x i64> + + // OGCG-LABEL: test_mm512_mask_cvtepi16_epi8 + // OGCG: %[[TRUNC:.*]] = trunc <32 x i16> %{{.*}} to <32 x i8> + // OGCG: %[[MASK_VEC:.*]] = bitcast i32 %{{.*}} to <32 x i1> + // OGCG: %[[SEL:.*]] = select <32 x i1> %[[MASK_VEC]], <32 x i8> %[[TRUNC]], <32 x i8> %{{.*}} + // OGCG: bitcast <32 x i8> %[[SEL]] to <4 x i64> + return _mm512_mask_cvtepi16_epi8(__O, __M, __A); +} + +__m256i test_mm512_maskz_cvtepi16_epi8(__mmask32 __M, __m512i __A) { + // CIR-LABEL: test_mm512_maskz_cvtepi16_epi8 + // CIR: %[[CALL:.*]] = cir.call {{.*}} : (!u32i, !cir.vector<8 x !s64i>) -> !cir.vector<4 x !s64i> + // CIR: cir.store %[[CALL]], %[[RETPTR:.*]] : !cir.vector<4 x !s64i>, !cir.ptr<!cir.vector<4 x !s64i>> + // CIR: %[[RETLOAD:.*]] = cir.load %[[RETPTR]] : !cir.ptr<!cir.vector<4 x !s64i>>, !cir.vector<4 x !s64i> + // CIR: cir.return %[[RETLOAD]] : !cir.vector<4 x !s64i> + + // LLVM-LABEL: test_mm512_maskz_cvtepi16_epi8 + // LLVM: %[[TRUNC:.*]] = trunc <32 x i16> %{{.*}} to <32 x i8> + // LLVM: %[[MASK_VEC:.*]] = bitcast i32 %{{.*}} to <32 x i1> + // LLVM: %[[SEL:.*]] = select <32 x i1> %[[MASK_VEC]], <32 x i8> %[[TRUNC]], <32 x i8> {{.*}} + // LLVM: bitcast <32 x i8> %[[SEL]] to <4 x i64> + + // OGCG-LABEL: test_mm512_maskz_cvtepi16_epi8 + // OGCG: %[[TRUNC:.*]] = trunc <32 x i16> %{{.*}} to <32 x i8> + // OGCG: %[[MASK_VEC:.*]] = bitcast i32 %{{.*}} to <32 x i1> + // OGCG: %[[SEL:.*]] = select <32 x i1> %[[MASK_VEC]], <32 x i8> %[[TRUNC]], <32 x i8> {{.*}} + // OGCG: bitcast <32 x i8> %[[SEL]] to <4 x i64> + return _mm512_maskz_cvtepi16_epi8(__M, __A); } __m512i test_mm512_alignr_epi8(__m512i __A,__m512i __B) { diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512f-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512f-builtins.c index 47d101e79ec15..19f80d9eb6030 100644 --- a/clang/test/CIR/CodeGenBuiltins/X86/avx512f-builtins.c +++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512f-builtins.c @@ -1053,3 +1053,66 @@ int test_mm512_kortestz(__mmask16 __A, __mmask16 __B) { // OGCG: zext i1 %[[CMP]] to i32 return _mm512_kortestz(__A,__B); } + +__m256i test_mm512_cvtepi64_epi32(__m512i __A) { + // CIR-LABEL: test_mm512_cvtepi64_epi32 + // CIR: %[[TRUNC:.*]] = cir.cast integral {{.*}} : !cir.vector<8 x !s64i> -> !cir.vector<8 x !s32i> + // CIR: %[[RETBC:.*]] = cir.cast bitcast {{.*}} : !cir.vector<8 x !s32i> -> !cir.vector<4 x !s64i> + // CIR: cir.store %[[RETBC]], %[[RETPTR:.*]] : !cir.vector<4 x !s64i>, !cir.ptr<!cir.vector<4 x !s64i>> + // CIR: %[[RETLOAD:.*]] = cir.load %[[RETPTR]] : !cir.ptr<!cir.vector<4 x !s64i>>, !cir.vector<4 x !s64i> + // CIR: cir.return %[[RETLOAD]] : !cir.vector<4 x !s64i> + + // LLVM-LABEL: test_mm512_cvtepi64_epi32 + // LLVM: %[[TRUNC:.*]] = trunc <8 x i64> %{{.*}} to <8 x i32> + // LLVM: bitcast <8 x i32> %[[TRUNC]] to <4 x i64> + + // OGCG-LABEL: test_mm512_cvtepi64_epi32 + // OGCG: %[[TRUNC:.*]] = trunc <8 x i64> %{{.*}} to <8 x i32> + // OGCG: bitcast <8 x i32> %[[TRUNC]] to <4 x i64> + return _mm512_cvtepi64_epi32(__A); +} + +__m256i test_mm512_mask_cvtepi64_epi32(__m256i __O, __mmask8 __M, __m512i __A) { + // CIR-LABEL: test_mm512_mask_cvtepi64_epi32 + // CIR: %[[TRUNC:.*]] = cir.cast integral {{.*}} : !cir.vector<8 x !s64i> -> !cir.vector<8 x !s32i> + // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast {{.*}} : !u8i -> !cir.vector<8 x !cir.int<s, 1>> + // CIR: %[[TER:.*]] = cir.vec.ternary(%[[MASK_VEC]], %[[TRUNC]], {{.*}}) : !cir.vector<8 x !cir.int<s, 1>>, !cir.vector<8 x !s32i> + // CIR: %[[RETBC:.*]] = cir.cast bitcast %[[TER]] : !cir.vector<8 x !s32i> -> !cir.vector<4 x !s64i> + // CIR: cir.store %[[RETBC]], %[[RETPTR:.*]] : !cir.vector<4 x !s64i>, !cir.ptr<!cir.vector<4 x !s64i>> + // CIR: %[[RETLOAD:.*]] = cir.load %[[RETPTR]] : !cir.ptr<!cir.vector<4 x !s64i>>, !cir.vector<4 x !s64i> + // CIR: cir.return %[[RETLOAD]] : !cir.vector<4 x !s64i> + + // LLVM-LABEL: test_mm512_mask_cvtepi64_epi32 + // LLVM: %[[TRUNC:.*]] = trunc <8 x i64> %{{.*}} to <8 x i32> + // LLVM: %[[MASK_VEC:.*]] = bitcast i8 %{{.*}} to <8 x i1> + // LLVM: %[[SEL:.*]] = select <8 x i1> %[[MASK_VEC]], <8 x i32> %[[TRUNC]], <8 x i32> %{{.*}} + // LLVM: bitcast <8 x i32> %[[SEL]] to <4 x i64> + + // OGCG-LABEL: test_mm512_mask_cvtepi64_epi32 + // OGCG: %[[TRUNC:.*]] = trunc <8 x i64> %{{.*}} to <8 x i32> + // OGCG: %[[MASK_VEC:.*]] = bitcast i8 %{{.*}} to <8 x i1> + // OGCG: %[[SEL:.*]] = select <8 x i1> %[[MASK_VEC]], <8 x i32> %[[TRUNC]], <8 x i32> %{{.*}} + // OGCG: bitcast <8 x i32> %[[SEL]] to <4 x i64> + return _mm512_mask_cvtepi64_epi32(__O, __M, __A); +} + +__m256i test_mm512_maskz_cvtepi64_epi32(__mmask8 __M, __m512i __A) { + // CIR-LABEL: test_mm512_maskz_cvtepi64_epi32 + // CIR: %[[CALL:.*]] = cir.call {{.*}} : (!u8i, !cir.vector<8 x !s64i>) -> !cir.vector<4 x !s64i> + // CIR: cir.store %[[CALL]], %[[RETPTR:.*]] : !cir.vector<4 x !s64i>, !cir.ptr<!cir.vector<4 x !s64i>> + // CIR: %[[RETLOAD:.*]] = cir.load %[[RETPTR]] : !cir.ptr<!cir.vector<4 x !s64i>>, !cir.vector<4 x !s64i> + // CIR: cir.return %[[RETLOAD]] : !cir.vector<4 x !s64i> + + // LLVM-LABEL: test_mm512_maskz_cvtepi64_epi32 + // LLVM: %[[TRUNC:.*]] = trunc <8 x i64> %{{.*}} to <8 x i32> + // LLVM: %[[MASK_VEC:.*]] = bitcast i8 %{{.*}} to <8 x i1> + // LLVM: %[[SEL:.*]] = select <8 x i1> %[[MASK_VEC]], <8 x i32> %[[TRUNC]], <8 x i32> {{.*}} + // LLVM: bitcast <8 x i32> %[[SEL]] to <4 x i64> + + // OGCG-LABEL: test_mm512_maskz_cvtepi64_epi32 + // OGCG: %[[TRUNC:.*]] = trunc <8 x i64> %{{.*}} to <8 x i32> + // OGCG: %[[MASK_VEC:.*]] = bitcast i8 %{{.*}} to <8 x i1> + // OGCG: %[[SEL:.*]] = select <8 x i1> %[[MASK_VEC]], <8 x i32> %[[TRUNC]], <8 x i32> {{.*}} + // OGCG: bitcast <8 x i32> %[[SEL]] to <4 x i64> + return _mm512_maskz_cvtepi64_epi32(__M, __A); +} _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
