https://github.com/MarwanTarik updated https://github.com/llvm/llvm-project/pull/171694
>From 82529b8bfd35c9e8059b49e2f17b3c837232cf09 Mon Sep 17 00:00:00 2001 From: MarwanTarik <[email protected]> Date: Wed, 10 Dec 2025 22:21:55 +0200 Subject: [PATCH 1/6] Upstream CIR Codgen for convert to mask X86 builtins --- clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 114 +++++++++++++++++++ clang/test/CodeGen/X86/avx512vlbw-builtins.c | 12 ++ 2 files changed, 126 insertions(+) diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp index fb17e31bf36d6..bba7249666aaf 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp @@ -231,6 +231,113 @@ static mlir::Value emitX86MaskTest(CIRGenBuilderTy &builder, mlir::Location loc, return emitIntrinsicCallOp(builder, loc, intrinsicName, resTy, mlir::ValueRange{lhsVec, rhsVec}); } +static mlir::Value emitX86MaskedCompareResult(CIRGenFunction &cgf, + mlir::Value cmp, unsigned numElts, + mlir::Value maskIn, + mlir::Location loc) { + if (maskIn) { + llvm_unreachable("NYI"); + } + if (numElts < 8) { + int64_t indices[8]; + for (unsigned i = 0; i != numElts; ++i) + indices[i] = i; + for (unsigned i = numElts; i != 8; ++i) + indices[i] = i % numElts + numElts; + + // This should shuffle between cmp (first vector) and null (second vector) + mlir::Value nullVec = cgf.getBuilder().getNullValue(cmp.getType(), loc); + cmp = cgf.getBuilder().createVecShuffle(loc, cmp, nullVec, indices); + } + return cgf.getBuilder().createBitcast( + cmp, cgf.getBuilder().getUIntNTy(std::max(numElts, 8U))); +} + +static mlir::Value emitX86MaskedCompare(CIRGenFunction &cgf, unsigned cc, + bool isSigned, + ArrayRef<mlir::Value> ops, + mlir::Location loc) { + assert((ops.size() == 2 || ops.size() == 4) && + "Unexpected number of arguments"); + unsigned numElts = cast<cir::VectorType>(ops[0].getType()).getSize(); + mlir::Value cmp; + + if (cc == 3) { + llvm_unreachable("NYI"); + } else if (cc == 7) { + llvm_unreachable("NYI"); + } else { + cir::CmpOpKind pred; + switch (cc) { + default: + llvm_unreachable("Unknown condition code"); + case 0: + pred = cir::CmpOpKind::eq; + break; + case 1: + pred = cir::CmpOpKind::lt; + break; + case 2: + pred = cir::CmpOpKind::le; + break; + case 4: + pred = cir::CmpOpKind::ne; + break; + case 5: + pred = cir::CmpOpKind::ge; + break; + case 6: + pred = cir::CmpOpKind::gt; + break; + } + + auto resultTy = cgf.getBuilder().getType<cir::VectorType>( + cgf.getBuilder().getUIntNTy(1), numElts); + cmp = cir::VecCmpOp::create(cgf.getBuilder(), loc, resultTy, pred, ops[0], + ops[1]); + } + + mlir::Value maskIn; + if (ops.size() == 4) + maskIn = ops[3]; + + return emitX86MaskedCompareResult(cgf, cmp, numElts, maskIn, loc); +} + +static mlir::Value emitX86ConvertToMask(CIRGenFunction &cgf, mlir::Value in, + mlir::Location loc) { + cir::ConstantOp zero = cgf.getBuilder().getNullValue(in.getType(), loc); + return emitX86MaskedCompare(cgf, 1, true, {in, zero}, loc); +} + +// Convert the mask from an integer type to a vector of i1. +static mlir::Value getMaskVecValue(CIRGenFunction &cgf, mlir::Value mask, + unsigned numElts, mlir::Location loc) { + cir::VectorType maskTy = + cir::VectorType::get(cgf.getBuilder().getSIntNTy(1), + cast<cir::IntType>(mask.getType()).getWidth()); + + mlir::Value maskVec = cgf.getBuilder().createBitcast(mask, maskTy); + + // If we have less than 8 elements, then the starting mask was an i8 and + // we need to extract down to the right number of elements. + if (numElts < 8) { + llvm::SmallVector<int64_t, 4> indices; + for (unsigned i = 0; i != numElts; ++i) + indices.push_back(i); + maskVec = cgf.getBuilder().createVecShuffle(loc, maskVec, maskVec, indices); + } + + return maskVec; +} + +static mlir::Value emitX86SExtMask(CIRGenFunction &cgf, mlir::Value op, + mlir::Type dstTy, mlir::Location loc) { + unsigned numberOfElements = cast<cir::VectorType>(dstTy).getSize(); + mlir::Value mask = getMaskVecValue(cgf, op, numberOfElements, loc); + + return cgf.getBuilder().createCast(loc, cir::CastKind::integral, mask, dstTy); +} static mlir::Value emitVecInsert(CIRGenBuilderTy &builder, mlir::Location loc, mlir::Value vec, mlir::Value value, @@ -558,6 +665,7 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, case X86::BI__builtin_ia32_storesh128_mask: case X86::BI__builtin_ia32_storess128_mask: case X86::BI__builtin_ia32_storesd128_mask: + case X86::BI__builtin_ia32_cvtmask2b128: case X86::BI__builtin_ia32_cvtmask2b256: case X86::BI__builtin_ia32_cvtmask2b512: @@ -570,6 +678,8 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, case X86::BI__builtin_ia32_cvtmask2q128: case X86::BI__builtin_ia32_cvtmask2q256: case X86::BI__builtin_ia32_cvtmask2q512: + return emitX86SExtMask(*this, ops[0], convertType(expr->getType()), + getLoc(expr->getExprLoc())); case X86::BI__builtin_ia32_cvtb2mask128: case X86::BI__builtin_ia32_cvtb2mask256: case X86::BI__builtin_ia32_cvtb2mask512: @@ -582,18 +692,22 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, case X86::BI__builtin_ia32_cvtq2mask128: case X86::BI__builtin_ia32_cvtq2mask256: case X86::BI__builtin_ia32_cvtq2mask512: + return emitX86ConvertToMask(*this, ops[0], getLoc(expr->getExprLoc())); case X86::BI__builtin_ia32_cvtdq2ps512_mask: case X86::BI__builtin_ia32_cvtqq2ps512_mask: case X86::BI__builtin_ia32_cvtqq2pd512_mask: case X86::BI__builtin_ia32_vcvtw2ph512_mask: case X86::BI__builtin_ia32_vcvtdq2ph512_mask: case X86::BI__builtin_ia32_vcvtqq2ph512_mask: + llvm_unreachable("vcvtw2ph256_round_mask NYI"); case X86::BI__builtin_ia32_cvtudq2ps512_mask: case X86::BI__builtin_ia32_cvtuqq2ps512_mask: case X86::BI__builtin_ia32_cvtuqq2pd512_mask: case X86::BI__builtin_ia32_vcvtuw2ph512_mask: case X86::BI__builtin_ia32_vcvtudq2ph512_mask: case X86::BI__builtin_ia32_vcvtuqq2ph512_mask: + llvm_unreachable("vcvtuw2ph256_round_mask NYI"); + case X86::BI__builtin_ia32_vfmaddsh3_mask: case X86::BI__builtin_ia32_vfmaddss3_mask: case X86::BI__builtin_ia32_vfmaddsd3_mask: diff --git a/clang/test/CodeGen/X86/avx512vlbw-builtins.c b/clang/test/CodeGen/X86/avx512vlbw-builtins.c index f6f27d9c3da3d..a088efa6784db 100644 --- a/clang/test/CodeGen/X86/avx512vlbw-builtins.c +++ b/clang/test/CodeGen/X86/avx512vlbw-builtins.c @@ -3226,6 +3226,18 @@ __m256i test_mm256_movm_epi8(__mmask32 __A) { return _mm256_movm_epi8(__A); } +__m512i test_mm512_movm_epi8(__mmask64 __A) { + // CIR-LABEL: _mm512_movm_epi8 + // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u64i -> !cir.vector<!cir.int<s, 1> x 64> + // CIR: %{{.*}} = cir.cast integral %{{.*}} : !cir.vector<!cir.int<s, 1> x 64> -> !cir.vector<{{!s8i|!u8i}} x 64> + + // LLVM-LABEL: @test_mm512_movm_epi8 + // LLVM: %{{.*}} = bitcast i64 %{{.*}} to <64 x i1> + // LLVM: %{{.*}} = sext <64 x i1> %{{.*}} to <64 x i8> + return _mm512_movm_epi8(__A); +} + + __m128i test_mm_movm_epi16(__mmask8 __A) { // CHECK-LABEL: test_mm_movm_epi16 // CHECK: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1> >From 392f69e188b558d2fee56cd3d50acb1f791599fe Mon Sep 17 00:00:00 2001 From: MarwanTarik <[email protected]> Date: Fri, 12 Dec 2025 07:03:08 +0200 Subject: [PATCH 2/6] Refactor x86 masked compare functions to use CIRGenBuilderTy and improve error handling --- clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 99 +++++++++++++--------- 1 file changed, 60 insertions(+), 39 deletions(-) diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp index c284286f43cc2..5ecc79e8baf00 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp @@ -14,12 +14,18 @@ #include "CIRGenBuilder.h" #include "CIRGenFunction.h" #include "CIRGenModule.h" +#include "mlir/IR/Attributes.h" +#include "mlir/IR/BuiltinAttributes.h" #include "mlir/IR/Location.h" +#include "mlir/IR/Types.h" #include "mlir/IR/ValueRange.h" #include "clang/Basic/Builtins.h" #include "clang/Basic/TargetBuiltins.h" +#include "clang/CIR/Dialect/IR/CIRAttrs.h" #include "clang/CIR/Dialect/IR/CIRTypes.h" #include "clang/CIR/MissingFeatures.h" +#include "llvm/ADT/Sequence.h" +#include <string> #include "llvm/Support/ErrorHandling.h" using namespace clang; @@ -278,30 +284,36 @@ static mlir::Value emitX86MaskTest(CIRGenBuilderTy &builder, mlir::Location loc, return emitIntrinsicCallOp(builder, loc, intrinsicName, resTy, mlir::ValueRange{lhsVec, rhsVec}); } -static mlir::Value emitX86MaskedCompareResult(CIRGenFunction &cgf, +// TODO: The cgf parameter should be removed when all the NYI cases are +// implemented. +static std::optional<mlir::Value> emitX86MaskedCompareResult(CIRGenFunction &cgf, CIRGenBuilderTy &builder, mlir::Value cmp, unsigned numElts, mlir::Value maskIn, mlir::Location loc) { if (maskIn) { - llvm_unreachable("NYI"); + cgf.cgm.errorNYI(loc, "emitX86MaskedCompareResult"); } if (numElts < 8) { - int64_t indices[8]; + llvm::SmallVector<mlir::Attribute> indices; + mlir::Type i64Ty = builder.getSInt64Ty(); + for (unsigned i = 0; i != numElts; ++i) - indices[i] = i; + indices.push_back(cir::IntAttr::get(i64Ty, i)); for (unsigned i = numElts; i != 8; ++i) - indices[i] = i % numElts + numElts; + indices.push_back(cir::IntAttr::get(i64Ty, i % numElts + numElts)); // This should shuffle between cmp (first vector) and null (second vector) - mlir::Value nullVec = cgf.getBuilder().getNullValue(cmp.getType(), loc); - cmp = cgf.getBuilder().createVecShuffle(loc, cmp, nullVec, indices); + mlir::Value nullVec = builder.getNullValue(cmp.getType(), loc); + cmp = builder.createVecShuffle(loc, cmp, nullVec, indices); } - return cgf.getBuilder().createBitcast( - cmp, cgf.getBuilder().getUIntNTy(std::max(numElts, 8U))); + return builder.createBitcast( + cmp, builder.getUIntNTy(std::max(numElts, 8U))); } -static mlir::Value emitX86MaskedCompare(CIRGenFunction &cgf, unsigned cc, - bool isSigned, +// TODO: The cgf parameter should be removed when all the NYI cases are +// implemented. +static std::optional<mlir::Value> emitX86MaskedCompare(CIRGenFunction &cgf, CIRGenBuilderTy &builder, + unsigned cc, bool isSigned, ArrayRef<mlir::Value> ops, mlir::Location loc) { assert((ops.size() == 2 || ops.size() == 4) && @@ -310,9 +322,9 @@ static mlir::Value emitX86MaskedCompare(CIRGenFunction &cgf, unsigned cc, mlir::Value cmp; if (cc == 3) { - llvm_unreachable("NYI"); + cgf.cgm.errorNYI(loc, "emitX86MaskedCompare: cc == 3"); } else if (cc == 7) { - llvm_unreachable("NYI"); + cgf.cgm.errorNYI(loc, "emitX86MaskedCompare cc == 7"); } else { cir::CmpOpKind pred; switch (cc) { @@ -338,9 +350,8 @@ static mlir::Value emitX86MaskedCompare(CIRGenFunction &cgf, unsigned cc, break; } - auto resultTy = cgf.getBuilder().getType<cir::VectorType>( - cgf.getBuilder().getUIntNTy(1), numElts); - cmp = cir::VecCmpOp::create(cgf.getBuilder(), loc, resultTy, pred, ops[0], + auto resultTy = cir::VectorType::get(builder.getSIntNTy(1), numElts); + cmp = cir::VecCmpOp::create(builder, loc, resultTy, pred, ops[0], ops[1]); } @@ -348,42 +359,51 @@ static mlir::Value emitX86MaskedCompare(CIRGenFunction &cgf, unsigned cc, if (ops.size() == 4) maskIn = ops[3]; - return emitX86MaskedCompareResult(cgf, cmp, numElts, maskIn, loc); + return emitX86MaskedCompareResult(cgf, builder, cmp, numElts, maskIn, loc); } -static mlir::Value emitX86ConvertToMask(CIRGenFunction &cgf, mlir::Value in, - mlir::Location loc) { - cir::ConstantOp zero = cgf.getBuilder().getNullValue(in.getType(), loc); - return emitX86MaskedCompare(cgf, 1, true, {in, zero}, loc); +// TODO: The cgf parameter should be removed when all the NYI cases are +// implemented. +static std::optional<mlir::Value> emitX86ConvertToMask(CIRGenFunction &cgf, CIRGenBuilderTy &builder, + mlir::Value in, mlir::Location loc) { + cir::ConstantOp zero = builder.getNullValue(in.getType(), loc); + return emitX86MaskedCompare(cgf, builder, 1, true, {in, zero}, loc); } // Convert the mask from an integer type to a vector of i1. -static mlir::Value getMaskVecValue(CIRGenFunction &cgf, mlir::Value mask, +static mlir::Value getMaskVecValue(CIRGenBuilderTy &builder, CIRGenFunction &cgf, + mlir::Value mask, unsigned numElts, mlir::Location loc) { cir::VectorType maskTy = - cir::VectorType::get(cgf.getBuilder().getSIntNTy(1), + cir::VectorType::get(builder.getSIntNTy(1), cast<cir::IntType>(mask.getType()).getWidth()); - mlir::Value maskVec = cgf.getBuilder().createBitcast(mask, maskTy); + mlir::Value maskVec = builder.createBitcast(mask, maskTy); // If we have less than 8 elements, then the starting mask was an i8 and // we need to extract down to the right number of elements. if (numElts < 8) { - llvm::SmallVector<int64_t, 4> indices; - for (unsigned i = 0; i != numElts; ++i) - indices.push_back(i); - maskVec = cgf.getBuilder().createVecShuffle(loc, maskVec, maskVec, indices); + llvm::SmallVector<mlir::Attribute> indices; + mlir::Type i64Ty = builder.getSInt64Ty(); + + for (auto i : llvm::seq<unsigned>(0, numElts)) + indices.push_back(cir::IntAttr::get(i64Ty, i)); + + maskVec = builder.createVecShuffle(loc, maskVec, maskVec, indices); } return maskVec; } -static mlir::Value emitX86SExtMask(CIRGenFunction &cgf, mlir::Value op, - mlir::Type dstTy, mlir::Location loc) { +// TODO: The cgf parameter should be removed when all the NYI cases are +// implemented. +static std::optional<mlir::Value> emitX86SExtMask(CIRGenFunction &cgf, CIRGenBuilderTy &builder, + mlir::Value op, + mlir::Type dstTy, mlir::Location loc) { unsigned numberOfElements = cast<cir::VectorType>(dstTy).getSize(); - mlir::Value mask = getMaskVecValue(cgf, op, numberOfElements, loc); + mlir::Value mask = getMaskVecValue(builder, cgf, op, numberOfElements, loc); - return cgf.getBuilder().createCast(loc, cir::CastKind::integral, mask, dstTy); + return builder.createCast(loc, cir::CastKind::integral, mask, dstTy); } static mlir::Value emitVecInsert(CIRGenBuilderTy &builder, mlir::Location loc, @@ -781,7 +801,9 @@ CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, const CallExpr *expr) { case X86::BI__builtin_ia32_storesh128_mask: case X86::BI__builtin_ia32_storess128_mask: case X86::BI__builtin_ia32_storesd128_mask: - + cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented x86 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); case X86::BI__builtin_ia32_cvtmask2b128: case X86::BI__builtin_ia32_cvtmask2b256: case X86::BI__builtin_ia32_cvtmask2b512: @@ -794,8 +816,9 @@ CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, const CallExpr *expr) { case X86::BI__builtin_ia32_cvtmask2q128: case X86::BI__builtin_ia32_cvtmask2q256: case X86::BI__builtin_ia32_cvtmask2q512: - return emitX86SExtMask(*this, ops[0], convertType(expr->getType()), - getLoc(expr->getExprLoc())); + return emitX86SExtMask(*this, this->getBuilder(), + ops[0], convertType(expr->getType()), + getLoc(expr->getExprLoc())).value(); case X86::BI__builtin_ia32_cvtb2mask128: case X86::BI__builtin_ia32_cvtb2mask256: case X86::BI__builtin_ia32_cvtb2mask512: @@ -808,22 +831,20 @@ CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, const CallExpr *expr) { case X86::BI__builtin_ia32_cvtq2mask128: case X86::BI__builtin_ia32_cvtq2mask256: case X86::BI__builtin_ia32_cvtq2mask512: - return emitX86ConvertToMask(*this, ops[0], getLoc(expr->getExprLoc())); + return emitX86ConvertToMask(*this, this->getBuilder(), + ops[0], getLoc(expr->getExprLoc())).value(); case X86::BI__builtin_ia32_cvtdq2ps512_mask: case X86::BI__builtin_ia32_cvtqq2ps512_mask: case X86::BI__builtin_ia32_cvtqq2pd512_mask: case X86::BI__builtin_ia32_vcvtw2ph512_mask: case X86::BI__builtin_ia32_vcvtdq2ph512_mask: case X86::BI__builtin_ia32_vcvtqq2ph512_mask: - llvm_unreachable("vcvtw2ph256_round_mask NYI"); case X86::BI__builtin_ia32_cvtudq2ps512_mask: case X86::BI__builtin_ia32_cvtuqq2ps512_mask: case X86::BI__builtin_ia32_cvtuqq2pd512_mask: case X86::BI__builtin_ia32_vcvtuw2ph512_mask: case X86::BI__builtin_ia32_vcvtudq2ph512_mask: case X86::BI__builtin_ia32_vcvtuqq2ph512_mask: - llvm_unreachable("vcvtuw2ph256_round_mask NYI"); - case X86::BI__builtin_ia32_vfmaddsh3_mask: case X86::BI__builtin_ia32_vfmaddss3_mask: case X86::BI__builtin_ia32_vfmaddsd3_mask: >From 43250a6bde76f3b927c0766b8bc26ea30b56f8f5 Mon Sep 17 00:00:00 2001 From: MarwanTarik <[email protected]> Date: Fri, 12 Dec 2025 09:33:04 +0200 Subject: [PATCH 3/6] [CIR] [X86] Fix x86 builtin expression handling by returning mlir::Value in error cases --- clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp index 5ecc79e8baf00..d066c4c18473d 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp @@ -804,6 +804,7 @@ CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, const CallExpr *expr) { cgm.errorNYI(expr->getSourceRange(), std::string("unimplemented x86 builtin call: ") + getContext().BuiltinInfo.getName(builtinID)); + return mlir::Value{}; case X86::BI__builtin_ia32_cvtmask2b128: case X86::BI__builtin_ia32_cvtmask2b256: case X86::BI__builtin_ia32_cvtmask2b512: @@ -818,7 +819,7 @@ CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, const CallExpr *expr) { case X86::BI__builtin_ia32_cvtmask2q512: return emitX86SExtMask(*this, this->getBuilder(), ops[0], convertType(expr->getType()), - getLoc(expr->getExprLoc())).value(); + getLoc(expr->getExprLoc())); case X86::BI__builtin_ia32_cvtb2mask128: case X86::BI__builtin_ia32_cvtb2mask256: case X86::BI__builtin_ia32_cvtb2mask512: @@ -832,7 +833,7 @@ CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, const CallExpr *expr) { case X86::BI__builtin_ia32_cvtq2mask256: case X86::BI__builtin_ia32_cvtq2mask512: return emitX86ConvertToMask(*this, this->getBuilder(), - ops[0], getLoc(expr->getExprLoc())).value(); + ops[0], getLoc(expr->getExprLoc())); case X86::BI__builtin_ia32_cvtdq2ps512_mask: case X86::BI__builtin_ia32_cvtqq2ps512_mask: case X86::BI__builtin_ia32_cvtqq2pd512_mask: >From d02fcae9772c8b958979c5e3f73cd5a492748071 Mon Sep 17 00:00:00 2001 From: MarwanTarik <[email protected]> Date: Fri, 12 Dec 2025 09:33:23 +0200 Subject: [PATCH 4/6] [CodeGen][X86] Add test for _mm512_movm_epi32 intrinsic with CIR and LLVM checks --- clang/test/CodeGen/X86/avx512vldq-builtins.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/clang/test/CodeGen/X86/avx512vldq-builtins.c b/clang/test/CodeGen/X86/avx512vldq-builtins.c index 92d8e1aa0879a..7fc34ca06fe58 100644 --- a/clang/test/CodeGen/X86/avx512vldq-builtins.c +++ b/clang/test/CodeGen/X86/avx512vldq-builtins.c @@ -951,6 +951,17 @@ __m256i test_mm256_movm_epi32(__mmask8 __A) { return _mm256_movm_epi32(__A); } +__m512i test_mm512_movm_epi32(__mmask16 __A) { + // CIR-LABEL: _mm512_movm_epi32 + // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u16i -> !cir.vector<!cir.int<s, 1> x 16> + // CIR: %{{.*}} = cir.cast integral %{{.*}} : !cir.vector<!cir.int<s, 1> x 16> -> !cir.vector<!s32i x 16> + + // LLVM-LABEL: @test_mm512_movm_epi32 + // LLVM: %{{.*}} = bitcast i16 %{{.*}} to <16 x i1> + // LLVM: %{{.*}} = sext <16 x i1> %{{.*}} to <16 x i32> + return _mm512_movm_epi32(__A); +} + __m128i test_mm_movm_epi64(__mmask8 __A) { // CHECK-LABEL: test_mm_movm_epi64 // CHECK: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1> >From 3518174f1be12dab784cdd4a67cc61728f479e67 Mon Sep 17 00:00:00 2001 From: MarwanTarik <[email protected]> Date: Mon, 15 Dec 2025 20:24:22 +0200 Subject: [PATCH 5/6] Add tests for AVX512 mask and move intrinsics in CIR --- .../CodeGenBuiltins/X86/avx512bw-builtins.c | 39 +++++++ .../CodeGenBuiltins/X86/avx512dq-builtins.c | 35 ++++++ .../CodeGenBuiltins/X86/avx512vlbw-builtins.c | 104 +++++++++++++++++ .../CodeGenBuiltins/X86/avx512vldq-builtins.c | 110 ++++++++++++++++++ 4 files changed, 288 insertions(+) create mode 100644 clang/test/CIR/CodeGenBuiltins/X86/avx512vlbw-builtins.c create mode 100644 clang/test/CIR/CodeGenBuiltins/X86/avx512vldq-builtins.c diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512bw-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512bw-builtins.c index 439c89e7953ab..7b46aea0faf21 100644 --- a/clang/test/CIR/CodeGenBuiltins/X86/avx512bw-builtins.c +++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512bw-builtins.c @@ -768,3 +768,42 @@ unsigned char test_ktestz_mask64_u8(__mmask64 A, __mmask64 B) { return _ktestz_mask64_u8(A, B); } + + +__m512i test_mm512_movm_epi16(__mmask32 __A) { + // CIR-LABEL: _mm512_movm_epi16 + // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u32i -> !cir.vector<!cir.int<s, 1> x 32> + // CIR: %{{.*}} = cir.cast integral %{{.*}} : !cir.vector<!cir.int<s, 1> x 32> -> !cir.vector<!s16i x 32> + // LLVM-LABEL: @test_mm512_movm_epi16 + // LLVM: %{{.*}} = bitcast i32 %{{.*}} to <32 x i1> + // LLVM: %{{.*}} = sext <32 x i1> %{{.*}} to <32 x i16> + return _mm512_movm_epi16(__A); +} + +__mmask64 test_mm512_movepi8_mask(__m512i __A) { + // CIR-LABEL: @_mm512_movepi8_mask + // CIR: %{{.*}} = cir.vec.cmp(lt, %{{.*}}, %{{.*}}) : !cir.vector<{{!s8i|!u8i}} x 64>, !cir.vector<!cir.int<u, 1> x 64> + + // LLVM-LABEL: @test_mm512_movepi8_mask + // LLVM: [[CMP:%.*]] = icmp slt <64 x i8> %{{.*}}, zeroinitializer + + // In the unsigned case below, the canonicalizer proves the comparison is + // always false (no i8 unsigned value can be < 0) and folds it away. + // LLVM-UNSIGNED-CHAR: store i64 0, ptr %{{.*}}, align 8 + + // OGCG-LABEL: @test_mm512_movepi8_mask + // OGCG: [[CMP:%.*]] = icmp slt <64 x i8> %{{.*}}, zeroinitializer + return _mm512_movepi8_mask(__A); +} + +__mmask32 test_mm512_movepi16_mask(__m512i __A) { + // CIR-LABEL: @_mm512_movepi16_mask + // CIR: %{{.*}} = cir.vec.cmp(lt, %{{.*}}, %{{.*}}) : !cir.vector<!s16i x 32>, !cir.vector<!cir.int<u, 1> x 32> + + // LLVM-LABEL: @test_mm512_movepi16_mask + // LLVM: [[CMP:%.*]] = icmp slt <32 x i16> %{{.*}}, zeroinitializer + + // OGCG-LABEL: @test_mm512_movepi16_mask + // OGCG: [[CMP:%.*]] = icmp slt <32 x i16> %{{.*}}, zeroinitializer + return _mm512_movepi16_mask(__A); +} diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512dq-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512dq-builtins.c index 3475e186e0c8f..66349ba4939fa 100644 --- a/clang/test/CIR/CodeGenBuiltins/X86/avx512dq-builtins.c +++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512dq-builtins.c @@ -13,6 +13,17 @@ #include <immintrin.h> +__m512i test_mm512_movm_epi64(__mmask8 __A) { + // CIR-LABEL: _mm512_movm_epi64 + // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u8i -> !cir.vector<!cir.int<s, 1> x 8> + // CIR: %{{.*}} = cir.cast integral %{{.*}} : !cir.vector<!cir.int<s, 1> x 8> -> !cir.vector<!s64i x 8> + // LLVM-LABEL: @test_mm512_movm_epi64 + // LLVM: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1> + // LLVM: %{{.*}} = sext <8 x i1> %{{.*}} to <8 x i64> + return _mm512_movm_epi64(__A); +} + + __mmask8 test_kadd_mask8(__mmask8 A, __mmask8 B) { // CIR-LABEL: _kadd_mask8 // CIR: cir.cast bitcast {{.*}} : !u8i -> !cir.vector<8 x !cir.int<u, 1>> @@ -323,3 +334,27 @@ unsigned char test_ktestz_mask16_u8(__mmask16 A, __mmask16 B) { // OGCG: trunc i32 %[[RES]] to i8 return _ktestz_mask16_u8(A, B); } + +__mmask16 test_mm512_movepi32_mask(__m512i __A) { + // CIR-LABEL: _mm512_movepi32_mask + // CIR: %{{.*}} = cir.vec.cmp(lt, %{{.*}}, %{{.*}}) : !cir.vector<!s32i x 16>, !cir.vector<!cir.int<u, 1> x 16> + + // LLVM-LABEL: @test_mm512_movepi32_mask + // LLVM: [[CMP:%.*]] = icmp slt <16 x i32> %{{.*}}, zeroinitializer + + // OGCG-LABEL: @test_mm512_movepi32_mask + // OGCG: [[CMP:%.*]] = icmp slt <16 x i32> %{{.*}}, zeroinitializer + return _mm512_movepi32_mask(__A); +} + +__mmask8 test_mm512_movepi64_mask(__m512i __A) { + // CIR-LABEL: @_mm512_movepi64_mask + // CIR: %{{.*}} = cir.vec.cmp(lt, %{{.*}}, %{{.*}}) : !cir.vector<!s64i x 8>, !cir.vector<!cir.int<u, 1> x 8> + + // LLVM-LABEL: @test_mm512_movepi64_mask + // LLVM: [[CMP:%.*]] = icmp slt <8 x i64> %{{.*}}, zeroinitializer + + // OGCG-LABEL: @test_mm512_movepi64_mask + // OGCG: [[CMP:%.*]] = icmp slt <8 x i64> %{{.*}}, zeroinitializer + return _mm512_movepi64_mask(__A); +} \ No newline at end of file diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512vlbw-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512vlbw-builtins.c new file mode 100644 index 0000000000000..ed67a859230fb --- /dev/null +++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512vlbw-builtins.c @@ -0,0 +1,104 @@ +// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512bw -target-feature +avx512vl -fclangir -emit-cir -o %t.cir -Wall -Werror -Wsign-conversion +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s +// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512bw -target-feature +avx512vl -fclangir -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion +// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s + +// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512bw -target-feature +avx512vl -fno-signed-char -fclangir -emit-cir -o %t.cir -Wall -Werror -Wsign-conversion +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s +// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512bw -target-feature +avx512vl -fno-signed-char -fclangir -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion +// RUN: FileCheck --check-prefixes=LLVM-UNSIGNED-CHAR --input-file=%t.ll %s + +// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx10.1-512 -target-feature +avx512vl -fclangir -emit-cir -o %t.cir -Wall -Werror -Wsign-conversion +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s +// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx10.1-512 -target-feature +avx512vl -fclangir -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion +// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s + +// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512bw -target-feature +avx512vl -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s --check-prefixes=OGCG +// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512bw -target-feature +avx512vl -fno-signed-char -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s --check-prefixes=OGCG +// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx10.1-512 -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s --check-prefixes=OGCG + + +#include <immintrin.h> + +__m128i test_mm_movm_epi8(__mmask16 __A) { + // CIR-LABEL: _mm_movm_epi8 + // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u16i -> !cir.vector<!cir.int<s, 1> x 16> + // CIR: %{{.*}} = cir.cast integral %{{.*}} : !cir.vector<!cir.int<s, 1> x 16> -> !cir.vector<{{!s8i|!u8i}} x 16> + + // LLVM-LABEL: @test_mm_movm_epi8 + // LLVM: %{{.*}} = bitcast i16 %{{.*}} to <16 x i1> + // LLVM: %{{.*}} = sext <16 x i1> %{{.*}} to <16 x i8> + return _mm_movm_epi8(__A); +} + +__m256i test_mm256_movm_epi8(__mmask32 __A) { + // CIR-LABEL: _mm256_movm_epi8 + // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u32i -> !cir.vector<!cir.int<s, 1> x 32> + // CIR: %{{.*}} = cir.cast integral %{{.*}} : !cir.vector<!cir.int<s, 1> x 32> -> !cir.vector<{{!s8i|!u8i}} x 32> + + // LLVM-LABEL: @test_mm256_movm_epi8 + // LLVM: %{{.*}} = bitcast i32 %{{.*}} to <32 x i1> + // LLVM: %{{.*}} = sext <32 x i1> %{{.*}} to <32 x i8> + return _mm256_movm_epi8(__A); +} + +__m512i test_mm512_movm_epi8(__mmask64 __A) { + // CIR-LABEL: _mm512_movm_epi8 + // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u64i -> !cir.vector<!cir.int<s, 1> x 64> + // CIR: %{{.*}} = cir.cast integral %{{.*}} : !cir.vector<!cir.int<s, 1> x 64> -> !cir.vector<{{!s8i|!u8i}} x 64> + + // LLVM-LABEL: @test_mm512_movm_epi8 + // LLVM: %{{.*}} = bitcast i64 %{{.*}} to <64 x i1> + // LLVM: %{{.*}} = sext <64 x i1> %{{.*}} to <64 x i8> + return _mm512_movm_epi8(__A); +} + +__m128i test_mm_movm_epi16(__mmask8 __A) { + // CIR-LABEL: _mm_movm_epi16 + // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u8i -> !cir.vector<!cir.int<s, 1> x 8> + // CIR: %{{.*}} = cir.cast integral %{{.*}} : !cir.vector<!cir.int<s, 1> x 8> -> !cir.vector<!s16i x 8> + + // LLVM-LABEL: @test_mm_movm_epi16 + // LLVM: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1> + // LLVM: %{{.*}} = sext <8 x i1> %{{.*}} to <8 x i16> + return _mm_movm_epi16(__A); +} + +__m256i test_mm256_movm_epi16(__mmask16 __A) { + // CIR-LABEL: _mm256_movm_epi16 + // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u16i -> !cir.vector<!cir.int<s, 1> x 16> + // CIR: %{{.*}} = cir.cast integral %{{.*}} : !cir.vector<!cir.int<s, 1> x 16> -> !cir.vector<!s16i x 16> + + // LLVM-LABEL: @test_mm256_movm_epi16 + // LLVM: %{{.*}} = bitcast i16 %{{.*}} to <16 x i1> + // LLVM: %{{.*}} = sext <16 x i1> %{{.*}} to <16 x i16> + return _mm256_movm_epi16(__A); +} + +__mmask16 test_mm_movepi8_mask(__m128i __A) { + // CIR-LABEL: _mm_movepi8_mask + // CIR: %{{.*}} = cir.vec.cmp(lt, %{{.*}}, %{{.*}}) : !cir.vector<{{!s8i|!u8i}} x 16>, !cir.vector<!cir.int<u, 1> x 16> + + // LLVM-LABEL: @test_mm_movepi8_mask + // LLVM: [[CMP:%.*]] = icmp slt <16 x i8> %{{.*}}, zeroinitializer + + // In the unsigned case below, the canonicalizer proves the comparison is + // always false (no i8 unsigned value can be < 0) and folds it away. + // LLVM-UNSIGNED-CHAR: store i16 0, ptr %{{.*}}, align 2 + + // OGCG-LABEL: @test_mm_movepi8_mask + // OGCG: [[CMP:%.*]] = icmp slt <16 x i8> %{{.*}}, zeroinitializer + return _mm_movepi8_mask(__A); +} + +__mmask16 test_mm256_movepi16_mask(__m256i __A) { + // CIR-LABEL: _mm256_movepi16_mask + // CIR: %{{.*}} = cir.vec.cmp(lt, %{{.*}}, %{{.*}}) : !cir.vector<!s16i x 16>, !cir.vector<!cir.int<u, 1> x 16> + + // LLVM-LABEL: @test_mm256_movepi16_mask + // LLVM: [[CMP:%.*]] = icmp slt <16 x i16> %{{.*}}, zeroinitializer + + // OGCG-LABEL: @test_mm256_movepi16_mask + // OGCG: [[CMP:%.*]] = icmp slt <16 x i16> %{{.*}}, zeroinitializer + return _mm256_movepi16_mask(__A); +} \ No newline at end of file diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512vldq-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512vldq-builtins.c new file mode 100644 index 0000000000000..ef9e9d7f9fde7 --- /dev/null +++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512vldq-builtins.c @@ -0,0 +1,110 @@ +// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512dq -target-feature +avx512vl -fclangir -emit-cir -o %t.cir -Wall -Werror +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s +// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512dq -target-feature +avx512vl -fclangir -emit-llvm -o %t.ll -Wall -Werror +// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s +// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512dq -target-feature +avx512vl -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=OGCG + +#include <immintrin.h> + + +__m128i test_mm_movm_epi32(__mmask8 __A) { + // CIR-LABEL: _mm_movm_epi32 + // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u8i -> !cir.vector<!cir.int<s, 1> x 8> + // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!cir.int<s, 1> x 8>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<!cir.int<s, 1> x 4> + // CIR: %{{.*}} = cir.cast integral %{{.*}} : !cir.vector<!cir.int<s, 1> x 4> -> !cir.vector<!s32i x 4> + + // LLVM-LABEL: @test_mm_movm_epi32 + // LLVM: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1> + // LLVM: %{{.*}} = shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + // LLVM: %{{.*}} = sext <4 x i1> %{{.*}} to <4 x i32> + return _mm_movm_epi32(__A); +} + +__m256i test_mm256_movm_epi32(__mmask8 __A) { + // CIR-LABEL: _mm256_movm_epi32 + // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u8i -> !cir.vector<!cir.int<s, 1> x 8> + // CIR: %{{.*}} = cir.cast integral %{{.*}} : !cir.vector<!cir.int<s, 1> x 8> -> !cir.vector<!s32i x 8> + + // LLVM-LABEL: @test_mm256_movm_epi32 + // LLVM: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1> + // LLVM: %{{.*}} = sext <8 x i1> %{{.*}} to <8 x i32> + return _mm256_movm_epi32(__A); +} + +__m512i test_mm512_movm_epi32(__mmask16 __A) { + // CIR-LABEL: _mm512_movm_epi32 + // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u16i -> !cir.vector<!cir.int<s, 1> x 16> + // CIR: %{{.*}} = cir.cast integral %{{.*}} : !cir.vector<!cir.int<s, 1> x 16> -> !cir.vector<!s32i x 16> + + // LLVM-LABEL: @test_mm512_movm_epi32 + // LLVM: %{{.*}} = bitcast i16 %{{.*}} to <16 x i1> + // LLVM: %{{.*}} = sext <16 x i1> %{{.*}} to <16 x i32> + return _mm512_movm_epi32(__A); +} + +__m128i test_mm_movm_epi64(__mmask8 __A) { + // CIR-LABEL: _mm_movm_epi64 + // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u8i -> !cir.vector<!cir.int<s, 1> x 8> + // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!cir.int<s, 1> x 8>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i] : !cir.vector<!cir.int<s, 1> x 2> + // CIR: %{{.*}} = cir.cast integral %{{.*}} : !cir.vector<!cir.int<s, 1> x 2> -> !cir.vector<!s64i x 2> + + // LLVM-LABEL: @test_mm_movm_epi64 + // LLVM: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1> + // LLVM: %{{.*}} = shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <2 x i32> <i32 0, i32 1> + // LLVM: %{{.*}} = sext <2 x i1> %{{.*}} to <2 x i64> + return _mm_movm_epi64(__A); +} + +__m256i test_mm256_movm_epi64(__mmask8 __A) { + // CIR-LABEL: _mm256_movm_epi64 + // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u8i -> !cir.vector<!cir.int<s, 1> x 8> + // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!cir.int<s, 1> x 8>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<!cir.int<s, 1> x 4> + // CIR: %{{.*}} = cir.cast integral %{{.*}} : !cir.vector<!cir.int<s, 1> x 4> -> !cir.vector<!s64i x 4> + + // LLVM-LABEL: @test_mm256_movm_epi64 + // LLVM: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1> + // LLVM: %{{.*}} = shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + // LLVM: %{{.*}} = sext <4 x i1> %{{.*}} to <4 x i64> + return _mm256_movm_epi64(__A); +} + +__mmask8 test_mm256_movepi32_mask(__m256i __A) { + // LLVM-LABEL: @test_mm256_movepi32_mask + // LLVM: [[CMP:%.*]] = icmp slt <8 x i32> %{{.*}}, zeroinitializer + + // OGCG-LABEL: @test_mm256_movepi32_mask + // OGCG: [[CMP:%.*]] = icmp slt <8 x i32> %{{.*}}, zeroinitializer + return _mm256_movepi32_mask(__A); +} + +__mmask8 test_mm_movepi64_mask(__m128i __A) { + // CIR-LABEL: _mm_movepi64_mask + // CIR: %{{.*}} = cir.vec.cmp(lt, %{{.*}}, %{{.*}}) : !cir.vector<!s64i x 2>, !cir.vector<!cir.int<u, 1> x 2> + // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!cir.int<u, 1> x 2>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<!cir.int<u, 1> x 8> + // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<!cir.int<u, 1> x 8> -> !u8i + + // LLVM-LABEL: @test_mm_movepi64_mask + // LLVM: [[CMP:%.*]] = icmp slt <2 x i64> %{{.*}}, zeroinitializer + // LLVM: [[SHUF:%.*]] = shufflevector <2 x i1> [[CMP]], <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> + + // OGCG-LABEL: @test_mm_movepi64_mask + // OGCG: [[CMP:%.*]] = icmp slt <2 x i64> %{{.*}}, zeroinitializer + // OGCG: [[SHUF:%.*]] = shufflevector <2 x i1> [[CMP]], <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> + return _mm_movepi64_mask(__A); +} + +__mmask8 test_mm256_movepi64_mask(__m256i __A) { + // CIR-LABEL: _mm256_movepi64_mask + // CIR: %{{.*}} = cir.vec.cmp(lt, %{{.*}}, %{{.*}}) : !cir.vector<!s64i x 4>, !cir.vector<!cir.int<u, 1> x 4> + // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!cir.int<u, 1> x 4>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<6> : !s32i, #cir.int<7> : !s32i] : !cir.vector<!cir.int<u, 1> x 8> + + // LLVM-LABEL: @test_mm256_movepi64_mask + // LLVM: [[CMP:%.*]] = icmp slt <4 x i64> %{{.*}}, zeroinitializer + // LLVM: [[SHUF:%.*]] = shufflevector <4 x i1> [[CMP]], <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> + + // OGCG-LABEL: @test_mm256_movepi64_mask + // OGCG: [[CMP:%.*]] = icmp slt <4 x i64> %{{.*}}, zeroinitializer + // OGCG: [[SHUF:%.*]] = shufflevector <4 x i1> [[CMP]], <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> + return _mm256_movepi64_mask(__A); +} + >From 9ec96625ca84ea7951725fc29e33ccf3b12c0b89 Mon Sep 17 00:00:00 2001 From: MarwanTarik <[email protected]> Date: Mon, 15 Dec 2025 20:24:41 +0200 Subject: [PATCH 6/6] [CIR] [X86] Improve error handling in emitX86MaskedCompareResult and emitX86MaskedCompare functions --- clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp index d066c4c18473d..6522a3d842f4d 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp @@ -292,6 +292,7 @@ static std::optional<mlir::Value> emitX86MaskedCompareResult(CIRGenFunction &cgf mlir::Location loc) { if (maskIn) { cgf.cgm.errorNYI(loc, "emitX86MaskedCompareResult"); + return {}; } if (numElts < 8) { llvm::SmallVector<mlir::Attribute> indices; @@ -323,8 +324,10 @@ static std::optional<mlir::Value> emitX86MaskedCompare(CIRGenFunction &cgf, CIRG if (cc == 3) { cgf.cgm.errorNYI(loc, "emitX86MaskedCompare: cc == 3"); + return {}; } else if (cc == 7) { cgf.cgm.errorNYI(loc, "emitX86MaskedCompare cc == 7"); + return {}; } else { cir::CmpOpKind pred; switch (cc) { _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
