https://github.com/Priyanshu3820 updated https://github.com/llvm/llvm-project/pull/173143
>From 0dd977df6b2fda837c32044c199a13a32c232b6f Mon Sep 17 00:00:00 2001 From: Priyanshu Kumar <[email protected]> Date: Sat, 20 Dec 2025 08:25:59 +0000 Subject: [PATCH 1/7] Implement handling for convert-half builtins --- clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 55 ++++++++++++- .../X86/avx512vlbf16-builtins.c | 80 +++++++++++++++++++ 2 files changed, 132 insertions(+), 3 deletions(-) create mode 100644 clang/test/CIR/CodeGenBuiltins/X86/avx512vlbf16-builtins.c diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp index 75bf25b20f1af..59d467da3a9fb 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp @@ -362,6 +362,27 @@ static mlir::Value emitX86Muldq(CIRGenBuilderTy &builder, mlir::Location loc, return builder.createMul(loc, lhs, rhs); } +static mlir::Value emitX86CvtF16ToFloatExpr(CIRGenBuilderTy &builder, + mlir::Location loc, + mlir::Type dstTy, + SmallVectorImpl<mlir::Value> &ops) { + + mlir::Value src = ops[0]; + mlir::Value passthru = ops[1]; + + auto vecTy = mlir::cast<cir::VectorType>(src.getType()); + uint64_t numElems = vecTy.getSize(); + + mlir::Value mask = getMaskVecValue(builder, loc, ops[2], numElems); + + auto halfTy = cir::VectorType::get(builder.getF16Type(), numElems); + mlir::Value srcF16 = builder.createBitcast(loc, src, halfTy); + + mlir::Value res = builder.createFloatingCast(srcF16, dstTy); + + return emitX86Select(builder, loc, mask, res, passthru); +} + static mlir::Value emitX86vpcom(CIRGenBuilderTy &builder, mlir::Location loc, llvm::SmallVector<mlir::Value> ops, bool isSigned) { @@ -1662,12 +1683,40 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, case X86::BI__builtin_ia32_cmpnltsd: case X86::BI__builtin_ia32_cmpnlesd: case X86::BI__builtin_ia32_cmpordsd: + cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented X86 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + return mlir::Value{}; case X86::BI__builtin_ia32_vcvtph2ps_mask: case X86::BI__builtin_ia32_vcvtph2ps256_mask: - case X86::BI__builtin_ia32_vcvtph2ps512_mask: - case X86::BI__builtin_ia32_cvtneps2bf16_128_mask: + case X86::BI__builtin_ia32_vcvtph2ps512_mask: { + mlir::Location loc = getLoc(expr->getExprLoc()); + return emitX86CvtF16ToFloatExpr(builder, loc, convertType(expr->getType()), + ops); + } + case X86::BI__builtin_ia32_cvtneps2bf16_128_mask: { + mlir::Location loc = getLoc(expr->getExprLoc()); + mlir::Value intrinsicMask = getMaskVecValue(builder, loc, ops[2], 4); + return emitIntrinsicCallOp(builder, loc, + "x86.avx512bf16.mask.cvtneps2bf16.128", + convertType(expr->getType()), + mlir::ValueRange{ops[0], ops[1], intrinsicMask}); + } case X86::BI__builtin_ia32_cvtneps2bf16_256_mask: - case X86::BI__builtin_ia32_cvtneps2bf16_512_mask: + case X86::BI__builtin_ia32_cvtneps2bf16_512_mask: { + mlir::Location loc = getLoc(expr->getExprLoc()); + unsigned numElts = cast<cir::VectorType>(ops[1].getType()).getSize(); + mlir::Value selectMask = getMaskVecValue(builder, loc, ops[2], numElts); + StringRef intrinsicName; + if (builtinID == X86::BI__builtin_ia32_cvtneps2bf16_256_mask) + intrinsicName = "x86.avx512bf16.cvtneps2bf16.256"; + else + intrinsicName = "x86.avx512bf16.cvtneps2bf16.512"; + mlir::Value intrinsicResult = + emitIntrinsicCallOp(builder, loc, intrinsicName, ops[1].getType(), + mlir::ValueRange{ops[0]}); + return emitX86Select(builder, loc, selectMask, intrinsicResult, ops[1]); + } case X86::BI__cpuid: case X86::BI__cpuidex: case X86::BI__emul: diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512vlbf16-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512vlbf16-builtins.c new file mode 100644 index 0000000000000..ccfc0d4a6a813 --- /dev/null +++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512vlbf16-builtins.c @@ -0,0 +1,80 @@ +// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512f -target-feature +avx512vl -target-feature +avx512bf16 -fclangir -emit-cir -o %t.cir -Wall -Werror -Wsign-conversion +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s +// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512f -target-feature +avx512vl -target-feature +avx512bf16 -fclangir -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion +// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s +// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512f -target-feature +avx512vl -target-feature +avx512bf16 -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion +// RUN: FileCheck --check-prefixes=OGCG --input-file=%t.ll %s + +#include <immintrin.h> + +__m256bh test_mm512_mask_cvtneps_pbh(__m256bh src, __mmask16 k, __m512 a) { + // CIR-LABEL: @test_mm512_mask_cvtneps_pbh + // CIR: cir.call @_mm512_mask_cvtneps_pbh({{.+}}, {{.+}}, {{.+}}) : (!cir.vector<16 x !cir.bf16>, !u16i, !cir.vector<16 x !cir.float>) -> !cir.vector<16 x !cir.bf16> + + // LLVM-LABEL: @test_mm512_mask_cvtneps_pbh + // LLVM: call <16 x bfloat> @llvm.x86.avx512bf16.cvtneps2bf16.512 + + // OGCG-LABEL: @test_mm512_mask_cvtneps_pbh + // OGCG: call <16 x bfloat> @llvm.x86.avx512bf16.cvtneps2bf16.512 + return _mm512_mask_cvtneps_pbh(src, k, a); +} + +__m256bh test_mm512_maskz_cvtneps_pbh(__mmask16 k, __m512 a) { + // CIR-LABEL: @test_mm512_maskz_cvtneps_pbh + // CIR: cir.call @_mm512_maskz_cvtneps_pbh({{.+}}, {{.+}}) : (!u16i, !cir.vector<16 x !cir.float>) -> !cir.vector<16 x !cir.bf16> + + // LLVM-LABEL: @test_mm512_maskz_cvtneps_pbh + // LLVM: call <16 x bfloat> @llvm.x86.avx512bf16.cvtneps2bf16.512(<16 x float> {{.+}}) + + // OGCG-LABEL: @test_mm512_maskz_cvtneps_pbh + // OGCG: call <16 x bfloat> @llvm.x86.avx512bf16.cvtneps2bf16.512(<16 x float> {{.+}}) + return _mm512_maskz_cvtneps_pbh(k, a); +} + +__m128bh test_mm256_mask_cvtneps_pbh(__m128bh src, __mmask8 k, __m256 a) { + // CIR-LABEL: test_mm256_mask_cvtneps_pbh + // CIR: cir.call @_mm256_mask_cvtneps_pbh({{.+}}, {{.+}}, {{.+}}) : (!cir.vector<8 x !cir.bf16>, !u8i, !cir.vector<8 x !cir.float>) -> !cir.vector<8 x !cir.bf16> + + // LLVM-LABEL: test_mm256_mask_cvtneps_pbh + // LLVM: call <8 x bfloat> @llvm.x86.avx512bf16.cvtneps2bf16.256(<8 x float> {{.+}}) + + // OGCG-LABEL: test_mm256_mask_cvtneps_pbh + // OGCG: call <8 x bfloat> @llvm.x86.avx512bf16.cvtneps2bf16.256(<8 x float> {{.+}}) + return _mm256_mask_cvtneps_pbh(src, k, a); +} + +__m128bh test_mm256_maskz_cvtneps_pbh(__mmask8 k, __m256 a) { + // CIR-LABEL: test_mm256_maskz_cvtneps_pbh + // CIR: cir.call @_mm256_maskz_cvtneps_pbh({{.+}}, {{.+}}) : (!u8i, !cir.vector<8 x !cir.float>) -> !cir.vector<8 x !cir.bf16> + + // LLVM-LABEL: test_mm256_maskz_cvtneps_pbh + // LLVM: call <8 x bfloat> @llvm.x86.avx512bf16.cvtneps2bf16.256(<8 x float> {{.+}}) + + // OGCG-LABEL: test_mm256_maskz_cvtneps_pbh + // OGCG: call <8 x bfloat> @llvm.x86.avx512bf16.cvtneps2bf16.256(<8 x float> {{.+}}) + return _mm256_maskz_cvtneps_pbh(k, a); +} + +__m128bh test_mm_mask_cvtneps_pbh(__m128bh src, __mmask8 k, __m128 a) { + // CIR-LABEL: test_mm_mask_cvtneps_pbh + // CIR: cir.call @_mm_mask_cvtneps_pbh({{.+}}, {{.+}}, {{.+}}) : (!cir.vector<8 x !cir.bf16>, !u8i, !cir.vector<4 x !cir.float>) -> !cir.vector<8 x !cir.bf1{{.+}} + + // LLVM-LABEL: test_mm_mask_cvtneps_pbh + // LLVM: call <8 x bfloat> @llvm.x86.avx512bf16.mask.cvtneps2bf16.128(<4 x float> {{.+}}, <8 x bfloat> {{.+}}, <4 x i1> %extract.i) + + // OGCG-LABEL: test_mm_mask_cvtneps_pbh + // OGCG: call <8 x bfloat> @llvm.x86.avx512bf16.mask.cvtneps2bf16.128(<4 x float> {{.+}}, <8 x bfloat> {{.+}}, <4 x i1> %extract.i) + return _mm_mask_cvtneps_pbh(src, k, a); +} + +__m128bh test_mm_maskz_cvtneps_pbh(__mmask8 k, __m128 a) { + // CIR-LABEL: test_mm_maskz_cvtneps_pbh + // CIR: cir.call @_mm_maskz_cvtneps_pbh({{.+}}, {{.+}}) : (!u8i, !cir.vector<4 x !cir.float>) -> !cir.vector<8 x !cir.bf16> + + // LLVM-LABEL: test_mm_maskz_cvtneps_pbh + // LLVM: call <8 x bfloat> @llvm.x86.avx512bf16.mask.cvtneps2bf16.128(<4 x float> {{.+}}, <8 x bfloat> {{.+}}, <4 x i1> %extract.i) + + // OGCG-LABEL: test_mm_maskz_cvtneps_pbh + // OGCG: call <8 x bfloat> @llvm.x86.avx512bf16.mask.cvtneps2bf16.128(<4 x float> {{.+}}, <8 x bfloat> {{.+}}, <4 x i1> %extract.i) + return _mm_maskz_cvtneps_pbh(k, a); +} >From ed0155382bf68c99fa3a7b158407da3717a73741 Mon Sep 17 00:00:00 2001 From: Priyanshu Kumar <[email protected]> Date: Sat, 20 Dec 2025 12:21:53 +0000 Subject: [PATCH 2/7] Update CIRGenBuiltinX86.cpp --- clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 45 ++++++++++++++++------ 1 file changed, 34 insertions(+), 11 deletions(-) diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp index 59d467da3a9fb..f27b68ca4a437 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp @@ -364,23 +364,46 @@ static mlir::Value emitX86Muldq(CIRGenBuilderTy &builder, mlir::Location loc, static mlir::Value emitX86CvtF16ToFloatExpr(CIRGenBuilderTy &builder, mlir::Location loc, - mlir::Type dstTy, - SmallVectorImpl<mlir::Value> &ops) { + SmallVectorImpl<mlir::Value> &ops, + mlir::Type DstTy) { + assert((ops.size() == 1 || ops.size() == 3 || ops.size() == 4) && + "Unknown cvtph2ps intrinsic"); + + // If the SAE intrinsic doesn't use default rounding then we can't upgrade. + if (ops.size() == 4) { + cir::ConstantOp constOp = ops[3].getDefiningOp<cir::ConstantOp>(); + if (constOp && + mlir::cast<mlir::IntegerAttr>(constOp.getValue()).getInt() != 4) { + return emitIntrinsicCallOp(builder, loc, "x86.avx512.mask.vcvtph2ps.512", + DstTy, ops); + } + } - mlir::Value src = ops[0]; - mlir::Value passthru = ops[1]; + uint64_t NumDstElts = mlir::cast<cir::VectorType>(DstTy).getSize(); + mlir::Value Src = ops[0]; - auto vecTy = mlir::cast<cir::VectorType>(src.getType()); - uint64_t numElems = vecTy.getSize(); + // Extract the subvector + if (NumDstElts != mlir::cast<cir::VectorType>(Src.getType()).getSize()) { + assert(NumDstElts == 4 && "Unexpected vector size"); - mlir::Value mask = getMaskVecValue(builder, loc, ops[2], numElems); + SmallVector<int32_t, 4> indices = {0, 1, 2, 3}; + Src = builder.createShuffle(loc, Src, Src, indices); + } - auto halfTy = cir::VectorType::get(builder.getF16Type(), numElems); - mlir::Value srcF16 = builder.createBitcast(loc, src, halfTy); + // Bitcast from vXi16 to vXf16. + cir::VectorType HalfTy = + cir::VectorType::get(builder.getF16Type(), NumDstElts); + Src = builder.createBitcast(loc, Src, HalfTy); - mlir::Value res = builder.createFloatingCast(srcF16, dstTy); + // Perform the fp-extension. + mlir::Value Res = builder.createFloatingCast(Src, DstTy); + + if (ops.size() >= 3) { + mlir::Value MaskVec = getMaskVecValue(builder, loc, ops[2], NumDstElts); + Res = emitX86Select(builder, loc, MaskVec, Res, ops[1]); + } - return emitX86Select(builder, loc, mask, res, passthru); + return Res; } static mlir::Value emitX86vpcom(CIRGenBuilderTy &builder, mlir::Location loc, >From 50e380f9aef07dc225e37147fa80da57c3839e56 Mon Sep 17 00:00:00 2001 From: Priyanshu Kumar <[email protected]> Date: Sat, 20 Dec 2025 19:16:13 +0000 Subject: [PATCH 3/7] Update CIRGenBuiltinX86.cpp --- clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 78 ++++------------------ 1 file changed, 13 insertions(+), 65 deletions(-) diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp index f27b68ca4a437..7862119d659f8 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp @@ -19,8 +19,9 @@ #include "clang/Basic/Builtins.h" #include "clang/Basic/TargetBuiltins.h" #include "clang/CIR/Dialect/IR/CIRTypes.h" -#include "clang/CIR/MissingFeatures.h" + #include "llvm/Support/ErrorHandling.h" +#include <cstdint> using namespace clang; using namespace clang::CIRGen; @@ -362,50 +363,6 @@ static mlir::Value emitX86Muldq(CIRGenBuilderTy &builder, mlir::Location loc, return builder.createMul(loc, lhs, rhs); } -static mlir::Value emitX86CvtF16ToFloatExpr(CIRGenBuilderTy &builder, - mlir::Location loc, - SmallVectorImpl<mlir::Value> &ops, - mlir::Type DstTy) { - assert((ops.size() == 1 || ops.size() == 3 || ops.size() == 4) && - "Unknown cvtph2ps intrinsic"); - - // If the SAE intrinsic doesn't use default rounding then we can't upgrade. - if (ops.size() == 4) { - cir::ConstantOp constOp = ops[3].getDefiningOp<cir::ConstantOp>(); - if (constOp && - mlir::cast<mlir::IntegerAttr>(constOp.getValue()).getInt() != 4) { - return emitIntrinsicCallOp(builder, loc, "x86.avx512.mask.vcvtph2ps.512", - DstTy, ops); - } - } - - uint64_t NumDstElts = mlir::cast<cir::VectorType>(DstTy).getSize(); - mlir::Value Src = ops[0]; - - // Extract the subvector - if (NumDstElts != mlir::cast<cir::VectorType>(Src.getType()).getSize()) { - assert(NumDstElts == 4 && "Unexpected vector size"); - - SmallVector<int32_t, 4> indices = {0, 1, 2, 3}; - Src = builder.createShuffle(loc, Src, Src, indices); - } - - // Bitcast from vXi16 to vXf16. - cir::VectorType HalfTy = - cir::VectorType::get(builder.getF16Type(), NumDstElts); - Src = builder.createBitcast(loc, Src, HalfTy); - - // Perform the fp-extension. - mlir::Value Res = builder.createFloatingCast(Src, DstTy); - - if (ops.size() >= 3) { - mlir::Value MaskVec = getMaskVecValue(builder, loc, ops[2], NumDstElts); - Res = emitX86Select(builder, loc, MaskVec, Res, ops[1]); - } - - return Res; -} - static mlir::Value emitX86vpcom(CIRGenBuilderTy &builder, mlir::Location loc, llvm::SmallVector<mlir::Value> ops, bool isSigned) { @@ -1706,38 +1663,29 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, case X86::BI__builtin_ia32_cmpnltsd: case X86::BI__builtin_ia32_cmpnlesd: case X86::BI__builtin_ia32_cmpordsd: + case X86::BI__builtin_ia32_vcvtph2ps_mask: + case X86::BI__builtin_ia32_vcvtph2ps256_mask: + case X86::BI__builtin_ia32_vcvtph2ps512_mask: cgm.errorNYI(expr->getSourceRange(), std::string("unimplemented X86 builtin call: ") + getContext().BuiltinInfo.getName(builtinID)); return mlir::Value{}; - case X86::BI__builtin_ia32_vcvtph2ps_mask: - case X86::BI__builtin_ia32_vcvtph2ps256_mask: - case X86::BI__builtin_ia32_vcvtph2ps512_mask: { - mlir::Location loc = getLoc(expr->getExprLoc()); - return emitX86CvtF16ToFloatExpr(builder, loc, convertType(expr->getType()), - ops); - } - case X86::BI__builtin_ia32_cvtneps2bf16_128_mask: { - mlir::Location loc = getLoc(expr->getExprLoc()); - mlir::Value intrinsicMask = getMaskVecValue(builder, loc, ops[2], 4); - return emitIntrinsicCallOp(builder, loc, - "x86.avx512bf16.mask.cvtneps2bf16.128", - convertType(expr->getType()), - mlir::ValueRange{ops[0], ops[1], intrinsicMask}); - } + case X86::BI__builtin_ia32_cvtneps2bf16_128_mask: case X86::BI__builtin_ia32_cvtneps2bf16_256_mask: case X86::BI__builtin_ia32_cvtneps2bf16_512_mask: { mlir::Location loc = getLoc(expr->getExprLoc()); - unsigned numElts = cast<cir::VectorType>(ops[1].getType()).getSize(); + mlir::Type resTy = convertType(expr->getType()); + unsigned numElts = cast<cir::VectorType>(resTy).getSize(); mlir::Value selectMask = getMaskVecValue(builder, loc, ops[2], numElts); StringRef intrinsicName; - if (builtinID == X86::BI__builtin_ia32_cvtneps2bf16_256_mask) + if (builtinID == X86::BI__builtin_ia32_cvtneps2bf16_128_mask) + intrinsicName = "x86.avx512bf16.cvtneps2bf16.128"; + else if (builtinID == X86::BI__builtin_ia32_cvtneps2bf16_256_mask) intrinsicName = "x86.avx512bf16.cvtneps2bf16.256"; else intrinsicName = "x86.avx512bf16.cvtneps2bf16.512"; - mlir::Value intrinsicResult = - emitIntrinsicCallOp(builder, loc, intrinsicName, ops[1].getType(), - mlir::ValueRange{ops[0]}); + mlir::Value intrinsicResult = emitIntrinsicCallOp( + builder, loc, intrinsicName, resTy, mlir::ValueRange{ops[0]}); return emitX86Select(builder, loc, selectMask, intrinsicResult, ops[1]); } case X86::BI__cpuid: >From b238c17d5d303e2910557b578503aa5ec5fcf2b3 Mon Sep 17 00:00:00 2001 From: Priyanshu Kumar <[email protected]> Date: Sat, 20 Dec 2025 19:18:30 +0000 Subject: [PATCH 4/7] Update CIRGenBuiltinsX86.cpp --- clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp index 7862119d659f8..810b027fdb33d 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp @@ -19,9 +19,8 @@ #include "clang/Basic/Builtins.h" #include "clang/Basic/TargetBuiltins.h" #include "clang/CIR/Dialect/IR/CIRTypes.h" - +#include "clang/CIR/MissingFeatures.h" #include "llvm/Support/ErrorHandling.h" -#include <cstdint> using namespace clang; using namespace clang::CIRGen; >From 9d3a326e95d0efe48bb3fdad1d2157106e0fa749 Mon Sep 17 00:00:00 2001 From: Priyanshu Kumar <[email protected]> Date: Sat, 20 Dec 2025 20:01:12 +0000 Subject: [PATCH 5/7] add support for maskz builtins --- clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 45 ++++++++++++++++++---- 1 file changed, 38 insertions(+), 7 deletions(-) diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp index 810b027fdb33d..cea4ef1b91275 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp @@ -1675,18 +1675,49 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, mlir::Location loc = getLoc(expr->getExprLoc()); mlir::Type resTy = convertType(expr->getType()); unsigned numElts = cast<cir::VectorType>(resTy).getSize(); - mlir::Value selectMask = getMaskVecValue(builder, loc, ops[2], numElts); + bool isMaskZ = false; + if (auto *callExpr = llvm::dyn_cast<clang::CallExpr>(expr)) { + if (auto *vecInit = + llvm::dyn_cast<clang::InitListExpr>(callExpr->getArg(0))) { + isMaskZ = + vecInit->getNumInits() == numElts && + llvm::all_of(llvm::seq<unsigned>(0, numElts), [&](unsigned i) { + auto *init = vecInit->getInit(i); + if (auto *intLit = llvm::dyn_cast<clang::IntegerLiteral>(init)) + return intLit->getValue().isZero(); + if (auto *floatLit = llvm::dyn_cast<clang::FloatingLiteral>(init)) + return floatLit->getValue().isZero(); + return false; + }); + } + } StringRef intrinsicName; + StringRef cirFuncName; if (builtinID == X86::BI__builtin_ia32_cvtneps2bf16_128_mask) - intrinsicName = "x86.avx512bf16.cvtneps2bf16.128"; + intrinsicName = "x86.avx512bf16.cvtneps2bf16.128", + cirFuncName = isMaskZ ? "_mm_maskz_cvtneps_pbh" : "_mm_mask_cvtneps_pbh"; else if (builtinID == X86::BI__builtin_ia32_cvtneps2bf16_256_mask) - intrinsicName = "x86.avx512bf16.cvtneps2bf16.256"; + intrinsicName = "x86.avx512bf16.cvtneps2bf16.256", + cirFuncName = + isMaskZ ? "_mm256_maskz_cvtneps_pbh" : "_mm256_mask_cvtneps_pbh"; else - intrinsicName = "x86.avx512bf16.cvtneps2bf16.512"; - mlir::Value intrinsicResult = emitIntrinsicCallOp( - builder, loc, intrinsicName, resTy, mlir::ValueRange{ops[0]}); - return emitX86Select(builder, loc, selectMask, intrinsicResult, ops[1]); + intrinsicName = "x86.avx512bf16.cvtneps2bf16.512", + cirFuncName = + isMaskZ ? "_mm512_maskz_cvtneps_pbh" : "_mm512_mask_cvtneps_pbh"; + if (isMaskZ) + return builder + .createCallOp( + loc, mlir::SymbolRefAttr::get(builder.getContext(), cirFuncName), + resTy, {ops[2], ops[0]}) + .getResult(); + else { + mlir::Value selectMask = getMaskVecValue(builder, loc, ops[2], numElts); + mlir::Value intrinsicResult = emitIntrinsicCallOp( + builder, loc, intrinsicName, resTy, mlir::ValueRange{ops[0]}); + return emitX86Select(builder, loc, selectMask, intrinsicResult, ops[1]); + } } + case X86::BI__cpuid: case X86::BI__cpuidex: case X86::BI__emul: >From d2118e69640c48c9b0211b4a7a2abb3d437e5fc5 Mon Sep 17 00:00:00 2001 From: Priyanshu Kumar <[email protected]> Date: Sat, 20 Dec 2025 20:34:04 +0000 Subject: [PATCH 6/7] Update CIRGenBuiltinX86.cpp --- clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp index cea4ef1b91275..f349b795f4e06 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp @@ -19,7 +19,6 @@ #include "clang/Basic/Builtins.h" #include "clang/Basic/TargetBuiltins.h" #include "clang/CIR/Dialect/IR/CIRTypes.h" -#include "clang/CIR/MissingFeatures.h" #include "llvm/Support/ErrorHandling.h" using namespace clang; @@ -1674,6 +1673,10 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, case X86::BI__builtin_ia32_cvtneps2bf16_512_mask: { mlir::Location loc = getLoc(expr->getExprLoc()); mlir::Type resTy = convertType(expr->getType()); + if (!isa<cir::VectorType>(resTy)) { + llvm::report_fatal_error( + "Expected cir::VectorType for AVX512 BF16 builtin lowering."); + } unsigned numElts = cast<cir::VectorType>(resTy).getSize(); bool isMaskZ = false; if (auto *callExpr = llvm::dyn_cast<clang::CallExpr>(expr)) { @@ -1710,12 +1713,10 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, loc, mlir::SymbolRefAttr::get(builder.getContext(), cirFuncName), resTy, {ops[2], ops[0]}) .getResult(); - else { - mlir::Value selectMask = getMaskVecValue(builder, loc, ops[2], numElts); - mlir::Value intrinsicResult = emitIntrinsicCallOp( - builder, loc, intrinsicName, resTy, mlir::ValueRange{ops[0]}); - return emitX86Select(builder, loc, selectMask, intrinsicResult, ops[1]); - } + mlir::Value selectMask = getMaskVecValue(builder, loc, ops[2], numElts); + mlir::Value intrinsicResult = emitIntrinsicCallOp( + builder, loc, intrinsicName, resTy, mlir::ValueRange{ops[0]}); + return emitX86Select(builder, loc, selectMask, intrinsicResult, ops[1]); } case X86::BI__cpuid: >From 72d3cccc1e0ea58fb662d71822e58416910eefb4 Mon Sep 17 00:00:00 2001 From: Priyanshu Kumar <[email protected]> Date: Sun, 21 Dec 2025 04:10:58 +0000 Subject: [PATCH 7/7] Guard cast to cir::VectorType --- clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp index 146dc7dc2bb0a..ed63cdfb99f77 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp @@ -1841,8 +1841,7 @@ CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, const CallExpr *expr) { mlir::Location loc = getLoc(expr->getExprLoc()); mlir::Type resTy = convertType(expr->getType()); if (!isa<cir::VectorType>(resTy)) { - llvm::report_fatal_error( - "Expected cir::VectorType for AVX512 BF16 builtin lowering."); + return mlir::Value(); } unsigned numElts = cast<cir::VectorType>(resTy).getSize(); bool isMaskZ = false; _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
