https://github.com/Priyanshu3820 updated 
https://github.com/llvm/llvm-project/pull/173143

>From 0dd977df6b2fda837c32044c199a13a32c232b6f Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <[email protected]>
Date: Sat, 20 Dec 2025 08:25:59 +0000
Subject: [PATCH 1/7] Implement handling for convert-half builtins

---
 clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp    | 55 ++++++++++++-
 .../X86/avx512vlbf16-builtins.c               | 80 +++++++++++++++++++
 2 files changed, 132 insertions(+), 3 deletions(-)
 create mode 100644 clang/test/CIR/CodeGenBuiltins/X86/avx512vlbf16-builtins.c

diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp 
b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index 75bf25b20f1af..59d467da3a9fb 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -362,6 +362,27 @@ static mlir::Value emitX86Muldq(CIRGenBuilderTy &builder, 
mlir::Location loc,
   return builder.createMul(loc, lhs, rhs);
 }
 
+static mlir::Value emitX86CvtF16ToFloatExpr(CIRGenBuilderTy &builder,
+                                            mlir::Location loc,
+                                            mlir::Type dstTy,
+                                            SmallVectorImpl<mlir::Value> &ops) 
{
+
+  mlir::Value src = ops[0];
+  mlir::Value passthru = ops[1];
+
+  auto vecTy = mlir::cast<cir::VectorType>(src.getType());
+  uint64_t numElems = vecTy.getSize();
+
+  mlir::Value mask = getMaskVecValue(builder, loc, ops[2], numElems);
+
+  auto halfTy = cir::VectorType::get(builder.getF16Type(), numElems);
+  mlir::Value srcF16 = builder.createBitcast(loc, src, halfTy);
+
+  mlir::Value res = builder.createFloatingCast(srcF16, dstTy);
+
+  return emitX86Select(builder, loc, mask, res, passthru);
+}
+
 static mlir::Value emitX86vpcom(CIRGenBuilderTy &builder, mlir::Location loc,
                                 llvm::SmallVector<mlir::Value> ops,
                                 bool isSigned) {
@@ -1662,12 +1683,40 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned 
builtinID,
   case X86::BI__builtin_ia32_cmpnltsd:
   case X86::BI__builtin_ia32_cmpnlesd:
   case X86::BI__builtin_ia32_cmpordsd:
+    cgm.errorNYI(expr->getSourceRange(),
+                 std::string("unimplemented X86 builtin call: ") +
+                     getContext().BuiltinInfo.getName(builtinID));
+    return mlir::Value{};
   case X86::BI__builtin_ia32_vcvtph2ps_mask:
   case X86::BI__builtin_ia32_vcvtph2ps256_mask:
-  case X86::BI__builtin_ia32_vcvtph2ps512_mask:
-  case X86::BI__builtin_ia32_cvtneps2bf16_128_mask:
+  case X86::BI__builtin_ia32_vcvtph2ps512_mask: {
+    mlir::Location loc = getLoc(expr->getExprLoc());
+    return emitX86CvtF16ToFloatExpr(builder, loc, convertType(expr->getType()),
+                                    ops);
+  }
+  case X86::BI__builtin_ia32_cvtneps2bf16_128_mask: {
+    mlir::Location loc = getLoc(expr->getExprLoc());
+    mlir::Value intrinsicMask = getMaskVecValue(builder, loc, ops[2], 4);
+    return emitIntrinsicCallOp(builder, loc,
+                               "x86.avx512bf16.mask.cvtneps2bf16.128",
+                               convertType(expr->getType()),
+                               mlir::ValueRange{ops[0], ops[1], 
intrinsicMask});
+  }
   case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:
-  case X86::BI__builtin_ia32_cvtneps2bf16_512_mask:
+  case X86::BI__builtin_ia32_cvtneps2bf16_512_mask: {
+    mlir::Location loc = getLoc(expr->getExprLoc());
+    unsigned numElts = cast<cir::VectorType>(ops[1].getType()).getSize();
+    mlir::Value selectMask = getMaskVecValue(builder, loc, ops[2], numElts);
+    StringRef intrinsicName;
+    if (builtinID == X86::BI__builtin_ia32_cvtneps2bf16_256_mask)
+      intrinsicName = "x86.avx512bf16.cvtneps2bf16.256";
+    else
+      intrinsicName = "x86.avx512bf16.cvtneps2bf16.512";
+    mlir::Value intrinsicResult =
+        emitIntrinsicCallOp(builder, loc, intrinsicName, ops[1].getType(),
+                            mlir::ValueRange{ops[0]});
+    return emitX86Select(builder, loc, selectMask, intrinsicResult, ops[1]);
+  }
   case X86::BI__cpuid:
   case X86::BI__cpuidex:
   case X86::BI__emul:
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512vlbf16-builtins.c 
b/clang/test/CIR/CodeGenBuiltins/X86/avx512vlbf16-builtins.c
new file mode 100644
index 0000000000000..ccfc0d4a6a813
--- /dev/null
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512vlbf16-builtins.c
@@ -0,0 +1,80 @@
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx512f -target-feature +avx512vl 
-target-feature +avx512bf16 -fclangir -emit-cir -o %t.cir -Wall -Werror 
-Wsign-conversion 
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx512f -target-feature +avx512vl 
-target-feature +avx512bf16 -fclangir -emit-llvm -o %t.ll -Wall -Werror 
-Wsign-conversion
+// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx512f -target-feature +avx512vl 
-target-feature +avx512bf16 -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion
+// RUN: FileCheck --check-prefixes=OGCG --input-file=%t.ll %s
+
+#include <immintrin.h>
+
+__m256bh test_mm512_mask_cvtneps_pbh(__m256bh src, __mmask16 k, __m512 a) {
+  // CIR-LABEL: @test_mm512_mask_cvtneps_pbh
+  // CIR: cir.call @_mm512_mask_cvtneps_pbh({{.+}}, {{.+}}, {{.+}}) : 
(!cir.vector<16 x !cir.bf16>, !u16i, !cir.vector<16 x !cir.float>) -> 
!cir.vector<16 x !cir.bf16>
+
+  // LLVM-LABEL: @test_mm512_mask_cvtneps_pbh
+  // LLVM: call <16 x bfloat> @llvm.x86.avx512bf16.cvtneps2bf16.512
+
+  // OGCG-LABEL: @test_mm512_mask_cvtneps_pbh
+  // OGCG: call <16 x bfloat> @llvm.x86.avx512bf16.cvtneps2bf16.512
+  return _mm512_mask_cvtneps_pbh(src, k, a);
+}
+
+__m256bh test_mm512_maskz_cvtneps_pbh(__mmask16 k, __m512 a) {
+  // CIR-LABEL: @test_mm512_maskz_cvtneps_pbh
+  // CIR: cir.call @_mm512_maskz_cvtneps_pbh({{.+}}, {{.+}}) : (!u16i, 
!cir.vector<16 x !cir.float>) -> !cir.vector<16 x !cir.bf16>
+
+  // LLVM-LABEL: @test_mm512_maskz_cvtneps_pbh
+  // LLVM: call <16 x bfloat> @llvm.x86.avx512bf16.cvtneps2bf16.512(<16 x 
float> {{.+}})
+
+  // OGCG-LABEL:  @test_mm512_maskz_cvtneps_pbh
+  // OGCG: call <16 x bfloat> @llvm.x86.avx512bf16.cvtneps2bf16.512(<16 x 
float> {{.+}})
+  return _mm512_maskz_cvtneps_pbh(k, a);
+}
+
+__m128bh test_mm256_mask_cvtneps_pbh(__m128bh src, __mmask8 k, __m256 a) {
+  // CIR-LABEL: test_mm256_mask_cvtneps_pbh
+  // CIR: cir.call @_mm256_mask_cvtneps_pbh({{.+}}, {{.+}}, {{.+}}) : 
(!cir.vector<8 x !cir.bf16>, !u8i, !cir.vector<8 x !cir.float>) -> 
!cir.vector<8 x !cir.bf16>
+  
+  // LLVM-LABEL: test_mm256_mask_cvtneps_pbh
+  // LLVM: call <8 x bfloat> @llvm.x86.avx512bf16.cvtneps2bf16.256(<8 x float> 
{{.+}})
+
+  // OGCG-LABEL: test_mm256_mask_cvtneps_pbh
+  // OGCG: call <8 x bfloat> @llvm.x86.avx512bf16.cvtneps2bf16.256(<8 x float> 
{{.+}})  
+  return _mm256_mask_cvtneps_pbh(src, k, a);
+}
+
+__m128bh test_mm256_maskz_cvtneps_pbh(__mmask8 k, __m256 a) {
+  // CIR-LABEL: test_mm256_maskz_cvtneps_pbh
+  // CIR: cir.call @_mm256_maskz_cvtneps_pbh({{.+}}, {{.+}}) : (!u8i, 
!cir.vector<8 x !cir.float>) -> !cir.vector<8 x !cir.bf16>
+
+  // LLVM-LABEL: test_mm256_maskz_cvtneps_pbh
+  // LLVM: call <8 x bfloat> @llvm.x86.avx512bf16.cvtneps2bf16.256(<8 x float> 
{{.+}})
+
+  // OGCG-LABEL: test_mm256_maskz_cvtneps_pbh
+  // OGCG: call <8 x bfloat> @llvm.x86.avx512bf16.cvtneps2bf16.256(<8 x float> 
{{.+}})
+  return _mm256_maskz_cvtneps_pbh(k, a);
+}
+
+__m128bh test_mm_mask_cvtneps_pbh(__m128bh src, __mmask8 k, __m128 a) {
+  // CIR-LABEL: test_mm_mask_cvtneps_pbh
+  // CIR: cir.call @_mm_mask_cvtneps_pbh({{.+}}, {{.+}}, {{.+}}) : 
(!cir.vector<8 x !cir.bf16>, !u8i, !cir.vector<4 x !cir.float>) -> 
!cir.vector<8 x !cir.bf1{{.+}}
+
+  // LLVM-LABEL: test_mm_mask_cvtneps_pbh
+  // LLVM: call <8 x bfloat> @llvm.x86.avx512bf16.mask.cvtneps2bf16.128(<4 x 
float> {{.+}}, <8 x bfloat> {{.+}}, <4 x i1> %extract.i)
+
+  // OGCG-LABEL: test_mm_mask_cvtneps_pbh
+  // OGCG: call <8 x bfloat> @llvm.x86.avx512bf16.mask.cvtneps2bf16.128(<4 x 
float> {{.+}}, <8 x bfloat> {{.+}}, <4 x i1> %extract.i)
+  return _mm_mask_cvtneps_pbh(src, k, a);
+}
+
+__m128bh test_mm_maskz_cvtneps_pbh(__mmask8 k, __m128 a) {
+  // CIR-LABEL: test_mm_maskz_cvtneps_pbh
+  // CIR: cir.call @_mm_maskz_cvtneps_pbh({{.+}}, {{.+}}) : (!u8i, 
!cir.vector<4 x !cir.float>) -> !cir.vector<8 x !cir.bf16>
+  
+  // LLVM-LABEL: test_mm_maskz_cvtneps_pbh
+  // LLVM: call <8 x bfloat> @llvm.x86.avx512bf16.mask.cvtneps2bf16.128(<4 x 
float> {{.+}}, <8 x bfloat> {{.+}}, <4 x i1> %extract.i)
+
+  // OGCG-LABEL: test_mm_maskz_cvtneps_pbh
+  // OGCG: call <8 x bfloat> @llvm.x86.avx512bf16.mask.cvtneps2bf16.128(<4 x 
float> {{.+}}, <8 x bfloat> {{.+}}, <4 x i1> %extract.i)
+  return _mm_maskz_cvtneps_pbh(k, a);
+}

>From ed0155382bf68c99fa3a7b158407da3717a73741 Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <[email protected]>
Date: Sat, 20 Dec 2025 12:21:53 +0000
Subject: [PATCH 2/7] Update CIRGenBuiltinX86.cpp

---
 clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 45 ++++++++++++++++------
 1 file changed, 34 insertions(+), 11 deletions(-)

diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp 
b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index 59d467da3a9fb..f27b68ca4a437 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -364,23 +364,46 @@ static mlir::Value emitX86Muldq(CIRGenBuilderTy &builder, 
mlir::Location loc,
 
 static mlir::Value emitX86CvtF16ToFloatExpr(CIRGenBuilderTy &builder,
                                             mlir::Location loc,
-                                            mlir::Type dstTy,
-                                            SmallVectorImpl<mlir::Value> &ops) 
{
+                                            SmallVectorImpl<mlir::Value> &ops,
+                                            mlir::Type DstTy) {
+  assert((ops.size() == 1 || ops.size() == 3 || ops.size() == 4) &&
+         "Unknown cvtph2ps intrinsic");
+
+  // If the SAE intrinsic doesn't use default rounding then we can't upgrade.
+  if (ops.size() == 4) {
+    cir::ConstantOp constOp = ops[3].getDefiningOp<cir::ConstantOp>();
+    if (constOp &&
+        mlir::cast<mlir::IntegerAttr>(constOp.getValue()).getInt() != 4) {
+      return emitIntrinsicCallOp(builder, loc, "x86.avx512.mask.vcvtph2ps.512",
+                                 DstTy, ops);
+    }
+  }
 
-  mlir::Value src = ops[0];
-  mlir::Value passthru = ops[1];
+  uint64_t NumDstElts = mlir::cast<cir::VectorType>(DstTy).getSize();
+  mlir::Value Src = ops[0];
 
-  auto vecTy = mlir::cast<cir::VectorType>(src.getType());
-  uint64_t numElems = vecTy.getSize();
+  // Extract the subvector
+  if (NumDstElts != mlir::cast<cir::VectorType>(Src.getType()).getSize()) {
+    assert(NumDstElts == 4 && "Unexpected vector size");
 
-  mlir::Value mask = getMaskVecValue(builder, loc, ops[2], numElems);
+    SmallVector<int32_t, 4> indices = {0, 1, 2, 3};
+    Src = builder.createShuffle(loc, Src, Src, indices);
+  }
 
-  auto halfTy = cir::VectorType::get(builder.getF16Type(), numElems);
-  mlir::Value srcF16 = builder.createBitcast(loc, src, halfTy);
+  // Bitcast from vXi16 to vXf16.
+  cir::VectorType HalfTy =
+      cir::VectorType::get(builder.getF16Type(), NumDstElts);
+  Src = builder.createBitcast(loc, Src, HalfTy);
 
-  mlir::Value res = builder.createFloatingCast(srcF16, dstTy);
+  // Perform the fp-extension.
+  mlir::Value Res = builder.createFloatingCast(Src, DstTy);
+
+  if (ops.size() >= 3) {
+    mlir::Value MaskVec = getMaskVecValue(builder, loc, ops[2], NumDstElts);
+    Res = emitX86Select(builder, loc, MaskVec, Res, ops[1]);
+  }
 
-  return emitX86Select(builder, loc, mask, res, passthru);
+  return Res;
 }
 
 static mlir::Value emitX86vpcom(CIRGenBuilderTy &builder, mlir::Location loc,

>From 50e380f9aef07dc225e37147fa80da57c3839e56 Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <[email protected]>
Date: Sat, 20 Dec 2025 19:16:13 +0000
Subject: [PATCH 3/7] Update CIRGenBuiltinX86.cpp

---
 clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 78 ++++------------------
 1 file changed, 13 insertions(+), 65 deletions(-)

diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp 
b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index f27b68ca4a437..7862119d659f8 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -19,8 +19,9 @@
 #include "clang/Basic/Builtins.h"
 #include "clang/Basic/TargetBuiltins.h"
 #include "clang/CIR/Dialect/IR/CIRTypes.h"
-#include "clang/CIR/MissingFeatures.h"
+
 #include "llvm/Support/ErrorHandling.h"
+#include <cstdint>
 
 using namespace clang;
 using namespace clang::CIRGen;
@@ -362,50 +363,6 @@ static mlir::Value emitX86Muldq(CIRGenBuilderTy &builder, 
mlir::Location loc,
   return builder.createMul(loc, lhs, rhs);
 }
 
-static mlir::Value emitX86CvtF16ToFloatExpr(CIRGenBuilderTy &builder,
-                                            mlir::Location loc,
-                                            SmallVectorImpl<mlir::Value> &ops,
-                                            mlir::Type DstTy) {
-  assert((ops.size() == 1 || ops.size() == 3 || ops.size() == 4) &&
-         "Unknown cvtph2ps intrinsic");
-
-  // If the SAE intrinsic doesn't use default rounding then we can't upgrade.
-  if (ops.size() == 4) {
-    cir::ConstantOp constOp = ops[3].getDefiningOp<cir::ConstantOp>();
-    if (constOp &&
-        mlir::cast<mlir::IntegerAttr>(constOp.getValue()).getInt() != 4) {
-      return emitIntrinsicCallOp(builder, loc, "x86.avx512.mask.vcvtph2ps.512",
-                                 DstTy, ops);
-    }
-  }
-
-  uint64_t NumDstElts = mlir::cast<cir::VectorType>(DstTy).getSize();
-  mlir::Value Src = ops[0];
-
-  // Extract the subvector
-  if (NumDstElts != mlir::cast<cir::VectorType>(Src.getType()).getSize()) {
-    assert(NumDstElts == 4 && "Unexpected vector size");
-
-    SmallVector<int32_t, 4> indices = {0, 1, 2, 3};
-    Src = builder.createShuffle(loc, Src, Src, indices);
-  }
-
-  // Bitcast from vXi16 to vXf16.
-  cir::VectorType HalfTy =
-      cir::VectorType::get(builder.getF16Type(), NumDstElts);
-  Src = builder.createBitcast(loc, Src, HalfTy);
-
-  // Perform the fp-extension.
-  mlir::Value Res = builder.createFloatingCast(Src, DstTy);
-
-  if (ops.size() >= 3) {
-    mlir::Value MaskVec = getMaskVecValue(builder, loc, ops[2], NumDstElts);
-    Res = emitX86Select(builder, loc, MaskVec, Res, ops[1]);
-  }
-
-  return Res;
-}
-
 static mlir::Value emitX86vpcom(CIRGenBuilderTy &builder, mlir::Location loc,
                                 llvm::SmallVector<mlir::Value> ops,
                                 bool isSigned) {
@@ -1706,38 +1663,29 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned 
builtinID,
   case X86::BI__builtin_ia32_cmpnltsd:
   case X86::BI__builtin_ia32_cmpnlesd:
   case X86::BI__builtin_ia32_cmpordsd:
+  case X86::BI__builtin_ia32_vcvtph2ps_mask:
+  case X86::BI__builtin_ia32_vcvtph2ps256_mask:
+  case X86::BI__builtin_ia32_vcvtph2ps512_mask:
     cgm.errorNYI(expr->getSourceRange(),
                  std::string("unimplemented X86 builtin call: ") +
                      getContext().BuiltinInfo.getName(builtinID));
     return mlir::Value{};
-  case X86::BI__builtin_ia32_vcvtph2ps_mask:
-  case X86::BI__builtin_ia32_vcvtph2ps256_mask:
-  case X86::BI__builtin_ia32_vcvtph2ps512_mask: {
-    mlir::Location loc = getLoc(expr->getExprLoc());
-    return emitX86CvtF16ToFloatExpr(builder, loc, convertType(expr->getType()),
-                                    ops);
-  }
-  case X86::BI__builtin_ia32_cvtneps2bf16_128_mask: {
-    mlir::Location loc = getLoc(expr->getExprLoc());
-    mlir::Value intrinsicMask = getMaskVecValue(builder, loc, ops[2], 4);
-    return emitIntrinsicCallOp(builder, loc,
-                               "x86.avx512bf16.mask.cvtneps2bf16.128",
-                               convertType(expr->getType()),
-                               mlir::ValueRange{ops[0], ops[1], 
intrinsicMask});
-  }
+  case X86::BI__builtin_ia32_cvtneps2bf16_128_mask:
   case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:
   case X86::BI__builtin_ia32_cvtneps2bf16_512_mask: {
     mlir::Location loc = getLoc(expr->getExprLoc());
-    unsigned numElts = cast<cir::VectorType>(ops[1].getType()).getSize();
+    mlir::Type resTy = convertType(expr->getType());
+    unsigned numElts = cast<cir::VectorType>(resTy).getSize();
     mlir::Value selectMask = getMaskVecValue(builder, loc, ops[2], numElts);
     StringRef intrinsicName;
-    if (builtinID == X86::BI__builtin_ia32_cvtneps2bf16_256_mask)
+    if (builtinID == X86::BI__builtin_ia32_cvtneps2bf16_128_mask)
+      intrinsicName = "x86.avx512bf16.cvtneps2bf16.128";
+    else if (builtinID == X86::BI__builtin_ia32_cvtneps2bf16_256_mask)
       intrinsicName = "x86.avx512bf16.cvtneps2bf16.256";
     else
       intrinsicName = "x86.avx512bf16.cvtneps2bf16.512";
-    mlir::Value intrinsicResult =
-        emitIntrinsicCallOp(builder, loc, intrinsicName, ops[1].getType(),
-                            mlir::ValueRange{ops[0]});
+    mlir::Value intrinsicResult = emitIntrinsicCallOp(
+        builder, loc, intrinsicName, resTy, mlir::ValueRange{ops[0]});
     return emitX86Select(builder, loc, selectMask, intrinsicResult, ops[1]);
   }
   case X86::BI__cpuid:

>From b238c17d5d303e2910557b578503aa5ec5fcf2b3 Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <[email protected]>
Date: Sat, 20 Dec 2025 19:18:30 +0000
Subject: [PATCH 4/7] Update CIRGenBuiltinsX86.cpp

---
 clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp 
b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index 7862119d659f8..810b027fdb33d 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -19,9 +19,8 @@
 #include "clang/Basic/Builtins.h"
 #include "clang/Basic/TargetBuiltins.h"
 #include "clang/CIR/Dialect/IR/CIRTypes.h"
-
+#include "clang/CIR/MissingFeatures.h"
 #include "llvm/Support/ErrorHandling.h"
-#include <cstdint>
 
 using namespace clang;
 using namespace clang::CIRGen;

>From 9d3a326e95d0efe48bb3fdad1d2157106e0fa749 Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <[email protected]>
Date: Sat, 20 Dec 2025 20:01:12 +0000
Subject: [PATCH 5/7] add support for maskz builtins

---
 clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 45 ++++++++++++++++++----
 1 file changed, 38 insertions(+), 7 deletions(-)

diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp 
b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index 810b027fdb33d..cea4ef1b91275 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -1675,18 +1675,49 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned 
builtinID,
     mlir::Location loc = getLoc(expr->getExprLoc());
     mlir::Type resTy = convertType(expr->getType());
     unsigned numElts = cast<cir::VectorType>(resTy).getSize();
-    mlir::Value selectMask = getMaskVecValue(builder, loc, ops[2], numElts);
+    bool isMaskZ = false;
+    if (auto *callExpr = llvm::dyn_cast<clang::CallExpr>(expr)) {
+      if (auto *vecInit =
+              llvm::dyn_cast<clang::InitListExpr>(callExpr->getArg(0))) {
+        isMaskZ =
+            vecInit->getNumInits() == numElts &&
+            llvm::all_of(llvm::seq<unsigned>(0, numElts), [&](unsigned i) {
+              auto *init = vecInit->getInit(i);
+              if (auto *intLit = llvm::dyn_cast<clang::IntegerLiteral>(init))
+                return intLit->getValue().isZero();
+              if (auto *floatLit = 
llvm::dyn_cast<clang::FloatingLiteral>(init))
+                return floatLit->getValue().isZero();
+              return false;
+            });
+      }
+    }
     StringRef intrinsicName;
+    StringRef cirFuncName;
     if (builtinID == X86::BI__builtin_ia32_cvtneps2bf16_128_mask)
-      intrinsicName = "x86.avx512bf16.cvtneps2bf16.128";
+      intrinsicName = "x86.avx512bf16.cvtneps2bf16.128",
+      cirFuncName = isMaskZ ? "_mm_maskz_cvtneps_pbh" : "_mm_mask_cvtneps_pbh";
     else if (builtinID == X86::BI__builtin_ia32_cvtneps2bf16_256_mask)
-      intrinsicName = "x86.avx512bf16.cvtneps2bf16.256";
+      intrinsicName = "x86.avx512bf16.cvtneps2bf16.256",
+      cirFuncName =
+          isMaskZ ? "_mm256_maskz_cvtneps_pbh" : "_mm256_mask_cvtneps_pbh";
     else
-      intrinsicName = "x86.avx512bf16.cvtneps2bf16.512";
-    mlir::Value intrinsicResult = emitIntrinsicCallOp(
-        builder, loc, intrinsicName, resTy, mlir::ValueRange{ops[0]});
-    return emitX86Select(builder, loc, selectMask, intrinsicResult, ops[1]);
+      intrinsicName = "x86.avx512bf16.cvtneps2bf16.512",
+      cirFuncName =
+          isMaskZ ? "_mm512_maskz_cvtneps_pbh" : "_mm512_mask_cvtneps_pbh";
+    if (isMaskZ)
+      return builder
+          .createCallOp(
+              loc, mlir::SymbolRefAttr::get(builder.getContext(), cirFuncName),
+              resTy, {ops[2], ops[0]})
+          .getResult();
+    else {
+      mlir::Value selectMask = getMaskVecValue(builder, loc, ops[2], numElts);
+      mlir::Value intrinsicResult = emitIntrinsicCallOp(
+          builder, loc, intrinsicName, resTy, mlir::ValueRange{ops[0]});
+      return emitX86Select(builder, loc, selectMask, intrinsicResult, ops[1]);
+    }
   }
+
   case X86::BI__cpuid:
   case X86::BI__cpuidex:
   case X86::BI__emul:

>From d2118e69640c48c9b0211b4a7a2abb3d437e5fc5 Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <[email protected]>
Date: Sat, 20 Dec 2025 20:34:04 +0000
Subject: [PATCH 6/7] Update CIRGenBuiltinX86.cpp

---
 clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp 
b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index cea4ef1b91275..f349b795f4e06 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -19,7 +19,6 @@
 #include "clang/Basic/Builtins.h"
 #include "clang/Basic/TargetBuiltins.h"
 #include "clang/CIR/Dialect/IR/CIRTypes.h"
-#include "clang/CIR/MissingFeatures.h"
 #include "llvm/Support/ErrorHandling.h"
 
 using namespace clang;
@@ -1674,6 +1673,10 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned 
builtinID,
   case X86::BI__builtin_ia32_cvtneps2bf16_512_mask: {
     mlir::Location loc = getLoc(expr->getExprLoc());
     mlir::Type resTy = convertType(expr->getType());
+    if (!isa<cir::VectorType>(resTy)) {
+      llvm::report_fatal_error(
+          "Expected cir::VectorType for AVX512 BF16 builtin lowering.");
+    }
     unsigned numElts = cast<cir::VectorType>(resTy).getSize();
     bool isMaskZ = false;
     if (auto *callExpr = llvm::dyn_cast<clang::CallExpr>(expr)) {
@@ -1710,12 +1713,10 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned 
builtinID,
               loc, mlir::SymbolRefAttr::get(builder.getContext(), cirFuncName),
               resTy, {ops[2], ops[0]})
           .getResult();
-    else {
-      mlir::Value selectMask = getMaskVecValue(builder, loc, ops[2], numElts);
-      mlir::Value intrinsicResult = emitIntrinsicCallOp(
-          builder, loc, intrinsicName, resTy, mlir::ValueRange{ops[0]});
-      return emitX86Select(builder, loc, selectMask, intrinsicResult, ops[1]);
-    }
+    mlir::Value selectMask = getMaskVecValue(builder, loc, ops[2], numElts);
+    mlir::Value intrinsicResult = emitIntrinsicCallOp(
+        builder, loc, intrinsicName, resTy, mlir::ValueRange{ops[0]});
+    return emitX86Select(builder, loc, selectMask, intrinsicResult, ops[1]);
   }
 
   case X86::BI__cpuid:

>From 72d3cccc1e0ea58fb662d71822e58416910eefb4 Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <[email protected]>
Date: Sun, 21 Dec 2025 04:10:58 +0000
Subject: [PATCH 7/7] Guard cast to cir::VectorType

---
 clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp 
b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index 146dc7dc2bb0a..ed63cdfb99f77 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -1841,8 +1841,7 @@ CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, 
const CallExpr *expr) {
     mlir::Location loc = getLoc(expr->getExprLoc());
     mlir::Type resTy = convertType(expr->getType());
     if (!isa<cir::VectorType>(resTy)) {
-      llvm::report_fatal_error(
-          "Expected cir::VectorType for AVX512 BF16 builtin lowering.");
+      return mlir::Value();
     }
     unsigned numElts = cast<cir::VectorType>(resTy).getSize();
     bool isMaskZ = false;

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to