[clang] [CIR] Upstream CIR codegen for blend x86 builtins (PR #174236)

2026-01-14 Thread Thibault Monnier via cfe-commits

https://github.com/Thibault-Monnier closed 
https://github.com/llvm/llvm-project/pull/174236
___
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [CIR] Upstream CIR codegen for blend x86 builtins (PR #174236)

2026-01-14 Thread Thibault Monnier via cfe-commits

https://github.com/Thibault-Monnier updated 
https://github.com/llvm/llvm-project/pull/174236

>From 855251d9f52f293aa18564f1b8f513728bf6a409 Mon Sep 17 00:00:00 2001
From: Thibault-Monnier 
Date: Fri, 2 Jan 2026 20:25:17 +0100
Subject: [PATCH 1/2] Upstream CIR codegen for x86 blend builtins

---
 clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp| 22 +--
 .../CIR/CodeGenBuiltins/X86/avx-builtins.c| 24 
 .../CIR/CodeGenBuiltins/X86/avx2-builtins.c   | 42 ++
 .../CIR/CodeGenBuiltins/X86/sse41-builtins.c  | 57 +--
 4 files changed, 135 insertions(+), 10 deletions(-)

diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp 
b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index 1c87e945de846..46de5bb5b9036 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -1275,6 +1275,10 @@ CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, 
const CallExpr *expr) {
   }
   case X86::BI__builtin_ia32_pmovqd512_mask:
   case X86::BI__builtin_ia32_pmovwb512_mask:
+cgm.errorNYI(expr->getSourceRange(),
+ std::string("unimplemented X86 builtin call: ") +
+ getContext().BuiltinInfo.getName(builtinID));
+return mlir::Value{};
   case X86::BI__builtin_ia32_pblendw128:
   case X86::BI__builtin_ia32_blendpd:
   case X86::BI__builtin_ia32_blendps:
@@ -1282,11 +1286,19 @@ CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, 
const CallExpr *expr) {
   case X86::BI__builtin_ia32_blendps256:
   case X86::BI__builtin_ia32_pblendw256:
   case X86::BI__builtin_ia32_pblendd128:
-  case X86::BI__builtin_ia32_pblendd256:
-cgm.errorNYI(expr->getSourceRange(),
- std::string("unimplemented X86 builtin call: ") +
- getContext().BuiltinInfo.getName(builtinID));
-return mlir::Value{};
+  case X86::BI__builtin_ia32_pblendd256: {
+uint32_t imm = getZExtIntValueFromConstOp(ops[2]);
+unsigned numElts = cast(ops[0].getType()).getSize();
+
+int64_t indices[16];
+// If there are more than 8 elements, the immediate is used twice so make
+// sure we handle that.
+for (unsigned i = 0; i != numElts; ++i)
+  indices[i] = ((imm >> (i % 8)) & 0x1) ? numElts + i : i;
+
+return builder.createVecShuffle(getLoc(expr->getExprLoc()), ops[0], ops[1],
+ArrayRef(indices, numElts));
+  }
   case X86::BI__builtin_ia32_pshuflw:
   case X86::BI__builtin_ia32_pshuflw256:
   case X86::BI__builtin_ia32_pshuflw512:
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx-builtins.c 
b/clang/test/CIR/CodeGenBuiltins/X86/avx-builtins.c
index 0de782701ddc4..01ca55994ce50 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/avx-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx-builtins.c
@@ -75,6 +75,30 @@ __m256i test_mm256_undefined_si256(void) {
   return _mm256_undefined_si256();
 }
 
+__m256d test_mm256_blend_pd(__m256d A, __m256d B) {
+  // CIR-LABEL: test_mm256_blend_pd
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<4 x 
!cir.double>) [#cir.int<4> : !s32i, #cir.int<1> : !s32i, #cir.int<6> : !s32i, 
#cir.int<3> : !s32i] : !cir.vector<4 x !cir.double>
+
+  // LLVM-LABEL: test_mm256_blend_pd
+  // LLVM: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> 

+
+  // OGCG-LABEL: test_mm256_blend_pd
+  // OGCG: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> 

+  return _mm256_blend_pd(A, B, 0x05);
+}
+
+__m256 test_mm256_blend_ps(__m256 A, __m256 B) {
+  // CIR-LABEL: test_mm256_blend_ps
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<8 x 
!cir.float>) [#cir.int<8> : !s32i, #cir.int<1> : !s32i, #cir.int<10> : !s32i, 
#cir.int<3> : !s32i, #cir.int<12> : !s32i, #cir.int<13> : !s32i, #cir.int<6> : 
!s32i, #cir.int<7> : !s32i] : !cir.vector<8 x !cir.float>
+
+  // LLVM-LABEL: test_mm256_blend_ps
+  // LLVM: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> 

+
+  // OGCG-LABEL: test_mm256_blend_ps
+  // OGCG: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> 

+  return _mm256_blend_ps(A, B, 0x35);
+}
+
 __m256d test_mm256_insertf128_pd(__m256d A, __m128d B) {
   // CIR-LABEL: test_mm256_insertf128_pd
   // %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<2 x 
!cir.double>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, 
#cir.int<3> : !s32i] : !cir.vector<4 x !cir.double>
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx2-builtins.c 
b/clang/test/CIR/CodeGenBuiltins/X86/avx2-builtins.c
index d4ed43ca1d26b..d3e54920ef186 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/avx2-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx2-builtins.c
@@ -58,6 +58,48 @@ __m256i test1_mm256_inserti128_si256(__m256i a, __m128i b) {
   return _mm256_inserti128_si256(a, b, 1);
 }
 
+__m256i test_mm256_blend_epi16(__m256i a, __m256i b) {
+  // CIR-LABEL: _mm256_blend_epi16
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}

[clang] [CIR] Upstream CIR codegen for blend x86 builtins (PR #174236)

2026-01-07 Thread Thibault Monnier via cfe-commits


@@ -1275,18 +1275,30 @@ CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, 
const CallExpr *expr) {
   }
   case X86::BI__builtin_ia32_pmovqd512_mask:
   case X86::BI__builtin_ia32_pmovwb512_mask:
+cgm.errorNYI(expr->getSourceRange(),
+ std::string("unimplemented X86 builtin call: ") +
+ getContext().BuiltinInfo.getName(builtinID));
+return mlir::Value{};
   case X86::BI__builtin_ia32_pblendw128:
   case X86::BI__builtin_ia32_blendpd:
   case X86::BI__builtin_ia32_blendps:
   case X86::BI__builtin_ia32_blendpd256:
   case X86::BI__builtin_ia32_blendps256:
   case X86::BI__builtin_ia32_pblendw256:
   case X86::BI__builtin_ia32_pblendd128:
-  case X86::BI__builtin_ia32_pblendd256:
-cgm.errorNYI(expr->getSourceRange(),
- std::string("unimplemented X86 builtin call: ") +
- getContext().BuiltinInfo.getName(builtinID));
-return mlir::Value{};
+  case X86::BI__builtin_ia32_pblendd256: {
+uint32_t imm = getZExtIntValueFromConstOp(ops[2]);
+unsigned numElts = cast(ops[0].getType()).getSize();
+
+int64_t indices[16];

Thibault-Monnier wrote:

Done, thanks.

https://github.com/llvm/llvm-project/pull/174236
___
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [CIR] Upstream CIR codegen for blend x86 builtins (PR #174236)

2026-01-07 Thread Thibault Monnier via cfe-commits

https://github.com/Thibault-Monnier updated 
https://github.com/llvm/llvm-project/pull/174236

>From 855251d9f52f293aa18564f1b8f513728bf6a409 Mon Sep 17 00:00:00 2001
From: Thibault-Monnier 
Date: Fri, 2 Jan 2026 20:25:17 +0100
Subject: [PATCH 1/2] Upstream CIR codegen for x86 blend builtins

---
 clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp| 22 +--
 .../CIR/CodeGenBuiltins/X86/avx-builtins.c| 24 
 .../CIR/CodeGenBuiltins/X86/avx2-builtins.c   | 42 ++
 .../CIR/CodeGenBuiltins/X86/sse41-builtins.c  | 57 +--
 4 files changed, 135 insertions(+), 10 deletions(-)

diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp 
b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index 1c87e945de846..46de5bb5b9036 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -1275,6 +1275,10 @@ CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, 
const CallExpr *expr) {
   }
   case X86::BI__builtin_ia32_pmovqd512_mask:
   case X86::BI__builtin_ia32_pmovwb512_mask:
+cgm.errorNYI(expr->getSourceRange(),
+ std::string("unimplemented X86 builtin call: ") +
+ getContext().BuiltinInfo.getName(builtinID));
+return mlir::Value{};
   case X86::BI__builtin_ia32_pblendw128:
   case X86::BI__builtin_ia32_blendpd:
   case X86::BI__builtin_ia32_blendps:
@@ -1282,11 +1286,19 @@ CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, 
const CallExpr *expr) {
   case X86::BI__builtin_ia32_blendps256:
   case X86::BI__builtin_ia32_pblendw256:
   case X86::BI__builtin_ia32_pblendd128:
-  case X86::BI__builtin_ia32_pblendd256:
-cgm.errorNYI(expr->getSourceRange(),
- std::string("unimplemented X86 builtin call: ") +
- getContext().BuiltinInfo.getName(builtinID));
-return mlir::Value{};
+  case X86::BI__builtin_ia32_pblendd256: {
+uint32_t imm = getZExtIntValueFromConstOp(ops[2]);
+unsigned numElts = cast(ops[0].getType()).getSize();
+
+int64_t indices[16];
+// If there are more than 8 elements, the immediate is used twice so make
+// sure we handle that.
+for (unsigned i = 0; i != numElts; ++i)
+  indices[i] = ((imm >> (i % 8)) & 0x1) ? numElts + i : i;
+
+return builder.createVecShuffle(getLoc(expr->getExprLoc()), ops[0], ops[1],
+ArrayRef(indices, numElts));
+  }
   case X86::BI__builtin_ia32_pshuflw:
   case X86::BI__builtin_ia32_pshuflw256:
   case X86::BI__builtin_ia32_pshuflw512:
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx-builtins.c 
b/clang/test/CIR/CodeGenBuiltins/X86/avx-builtins.c
index 0de782701ddc4..01ca55994ce50 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/avx-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx-builtins.c
@@ -75,6 +75,30 @@ __m256i test_mm256_undefined_si256(void) {
   return _mm256_undefined_si256();
 }
 
+__m256d test_mm256_blend_pd(__m256d A, __m256d B) {
+  // CIR-LABEL: test_mm256_blend_pd
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<4 x 
!cir.double>) [#cir.int<4> : !s32i, #cir.int<1> : !s32i, #cir.int<6> : !s32i, 
#cir.int<3> : !s32i] : !cir.vector<4 x !cir.double>
+
+  // LLVM-LABEL: test_mm256_blend_pd
+  // LLVM: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> 

+
+  // OGCG-LABEL: test_mm256_blend_pd
+  // OGCG: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> 

+  return _mm256_blend_pd(A, B, 0x05);
+}
+
+__m256 test_mm256_blend_ps(__m256 A, __m256 B) {
+  // CIR-LABEL: test_mm256_blend_ps
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<8 x 
!cir.float>) [#cir.int<8> : !s32i, #cir.int<1> : !s32i, #cir.int<10> : !s32i, 
#cir.int<3> : !s32i, #cir.int<12> : !s32i, #cir.int<13> : !s32i, #cir.int<6> : 
!s32i, #cir.int<7> : !s32i] : !cir.vector<8 x !cir.float>
+
+  // LLVM-LABEL: test_mm256_blend_ps
+  // LLVM: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> 

+
+  // OGCG-LABEL: test_mm256_blend_ps
+  // OGCG: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> 

+  return _mm256_blend_ps(A, B, 0x35);
+}
+
 __m256d test_mm256_insertf128_pd(__m256d A, __m128d B) {
   // CIR-LABEL: test_mm256_insertf128_pd
   // %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<2 x 
!cir.double>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, 
#cir.int<3> : !s32i] : !cir.vector<4 x !cir.double>
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx2-builtins.c 
b/clang/test/CIR/CodeGenBuiltins/X86/avx2-builtins.c
index d4ed43ca1d26b..d3e54920ef186 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/avx2-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx2-builtins.c
@@ -58,6 +58,48 @@ __m256i test1_mm256_inserti128_si256(__m256i a, __m128i b) {
   return _mm256_inserti128_si256(a, b, 1);
 }
 
+__m256i test_mm256_blend_epi16(__m256i a, __m256i b) {
+  // CIR-LABEL: _mm256_blend_epi16
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}

[clang] [CIR] Upstream CIR codegen for blend x86 builtins (PR #174236)

2026-01-05 Thread Andy Kaylor via cfe-commits


@@ -1275,18 +1275,30 @@ CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, 
const CallExpr *expr) {
   }
   case X86::BI__builtin_ia32_pmovqd512_mask:
   case X86::BI__builtin_ia32_pmovwb512_mask:
+cgm.errorNYI(expr->getSourceRange(),
+ std::string("unimplemented X86 builtin call: ") +
+ getContext().BuiltinInfo.getName(builtinID));
+return mlir::Value{};
   case X86::BI__builtin_ia32_pblendw128:
   case X86::BI__builtin_ia32_blendpd:
   case X86::BI__builtin_ia32_blendps:
   case X86::BI__builtin_ia32_blendpd256:
   case X86::BI__builtin_ia32_blendps256:
   case X86::BI__builtin_ia32_pblendw256:
   case X86::BI__builtin_ia32_pblendd128:
-  case X86::BI__builtin_ia32_pblendd256:
-cgm.errorNYI(expr->getSourceRange(),
- std::string("unimplemented X86 builtin call: ") +
- getContext().BuiltinInfo.getName(builtinID));
-return mlir::Value{};
+  case X86::BI__builtin_ia32_pblendd256: {
+uint32_t imm = getZExtIntValueFromConstOp(ops[2]);
+unsigned numElts = cast(ops[0].getType()).getSize();
+
+int64_t indices[16];

andykaylor wrote:

Rather than populating an array of integers here, can you create a 
`SmallVector` of `mlir::Attribute` and populate it with `cir::IntAttr` created 
based on the value at line 1297? That avoids any concerns about overflowing the 
number of elements, and it saves `createVecShuffle` from having to create such 
a vector of attributes.

https://github.com/llvm/llvm-project/pull/174236
___
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [CIR] Upstream CIR codegen for blend x86 builtins (PR #174236)

2026-01-02 Thread via cfe-commits

llvmbot wrote:




@llvm/pr-subscribers-clang

Author: Thibault Monnier (Thibault-Monnier)


Changes

Part of https://github.com/llvm/llvm-project/issues/167752.

---
Full diff: https://github.com/llvm/llvm-project/pull/174236.diff


4 Files Affected:

- (modified) clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp (+17-5) 
- (modified) clang/test/CIR/CodeGenBuiltins/X86/avx-builtins.c (+24) 
- (modified) clang/test/CIR/CodeGenBuiltins/X86/avx2-builtins.c (+42) 
- (modified) clang/test/CIR/CodeGenBuiltins/X86/sse41-builtins.c (+52-5) 


``diff
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp 
b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index 1c87e945de846..46de5bb5b9036 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -1275,6 +1275,10 @@ CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, 
const CallExpr *expr) {
   }
   case X86::BI__builtin_ia32_pmovqd512_mask:
   case X86::BI__builtin_ia32_pmovwb512_mask:
+cgm.errorNYI(expr->getSourceRange(),
+ std::string("unimplemented X86 builtin call: ") +
+ getContext().BuiltinInfo.getName(builtinID));
+return mlir::Value{};
   case X86::BI__builtin_ia32_pblendw128:
   case X86::BI__builtin_ia32_blendpd:
   case X86::BI__builtin_ia32_blendps:
@@ -1282,11 +1286,19 @@ CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, 
const CallExpr *expr) {
   case X86::BI__builtin_ia32_blendps256:
   case X86::BI__builtin_ia32_pblendw256:
   case X86::BI__builtin_ia32_pblendd128:
-  case X86::BI__builtin_ia32_pblendd256:
-cgm.errorNYI(expr->getSourceRange(),
- std::string("unimplemented X86 builtin call: ") +
- getContext().BuiltinInfo.getName(builtinID));
-return mlir::Value{};
+  case X86::BI__builtin_ia32_pblendd256: {
+uint32_t imm = getZExtIntValueFromConstOp(ops[2]);
+unsigned numElts = cast(ops[0].getType()).getSize();
+
+int64_t indices[16];
+// If there are more than 8 elements, the immediate is used twice so make
+// sure we handle that.
+for (unsigned i = 0; i != numElts; ++i)
+  indices[i] = ((imm >> (i % 8)) & 0x1) ? numElts + i : i;
+
+return builder.createVecShuffle(getLoc(expr->getExprLoc()), ops[0], ops[1],
+ArrayRef(indices, numElts));
+  }
   case X86::BI__builtin_ia32_pshuflw:
   case X86::BI__builtin_ia32_pshuflw256:
   case X86::BI__builtin_ia32_pshuflw512:
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx-builtins.c 
b/clang/test/CIR/CodeGenBuiltins/X86/avx-builtins.c
index 0de782701ddc4..01ca55994ce50 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/avx-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx-builtins.c
@@ -75,6 +75,30 @@ __m256i test_mm256_undefined_si256(void) {
   return _mm256_undefined_si256();
 }
 
+__m256d test_mm256_blend_pd(__m256d A, __m256d B) {
+  // CIR-LABEL: test_mm256_blend_pd
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<4 x 
!cir.double>) [#cir.int<4> : !s32i, #cir.int<1> : !s32i, #cir.int<6> : !s32i, 
#cir.int<3> : !s32i] : !cir.vector<4 x !cir.double>
+
+  // LLVM-LABEL: test_mm256_blend_pd
+  // LLVM: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> 

+
+  // OGCG-LABEL: test_mm256_blend_pd
+  // OGCG: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> 

+  return _mm256_blend_pd(A, B, 0x05);
+}
+
+__m256 test_mm256_blend_ps(__m256 A, __m256 B) {
+  // CIR-LABEL: test_mm256_blend_ps
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<8 x 
!cir.float>) [#cir.int<8> : !s32i, #cir.int<1> : !s32i, #cir.int<10> : !s32i, 
#cir.int<3> : !s32i, #cir.int<12> : !s32i, #cir.int<13> : !s32i, #cir.int<6> : 
!s32i, #cir.int<7> : !s32i] : !cir.vector<8 x !cir.float>
+
+  // LLVM-LABEL: test_mm256_blend_ps
+  // LLVM: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> 

+
+  // OGCG-LABEL: test_mm256_blend_ps
+  // OGCG: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> 

+  return _mm256_blend_ps(A, B, 0x35);
+}
+
 __m256d test_mm256_insertf128_pd(__m256d A, __m128d B) {
   // CIR-LABEL: test_mm256_insertf128_pd
   // %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<2 x 
!cir.double>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, 
#cir.int<3> : !s32i] : !cir.vector<4 x !cir.double>
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx2-builtins.c 
b/clang/test/CIR/CodeGenBuiltins/X86/avx2-builtins.c
index d4ed43ca1d26b..d3e54920ef186 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/avx2-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx2-builtins.c
@@ -58,6 +58,48 @@ __m256i test1_mm256_inserti128_si256(__m256i a, __m128i b) {
   return _mm256_inserti128_si256(a, b, 1);
 }
 
+__m256i test_mm256_blend_epi16(__m256i a, __m256i b) {
+  // CIR-LABEL: _mm256_blend_epi16
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<16 x 
!s16i>) [#cir.int<0> : !s32i, #cir.int<1

[clang] [CIR] Upstream CIR codegen for blend x86 builtins (PR #174236)

2026-01-02 Thread Thibault Monnier via cfe-commits

https://github.com/Thibault-Monnier created 
https://github.com/llvm/llvm-project/pull/174236

Part of https://github.com/llvm/llvm-project/issues/167752.

>From 855251d9f52f293aa18564f1b8f513728bf6a409 Mon Sep 17 00:00:00 2001
From: Thibault-Monnier 
Date: Fri, 2 Jan 2026 20:25:17 +0100
Subject: [PATCH] Upstream CIR codegen for x86 blend builtins

---
 clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp| 22 +--
 .../CIR/CodeGenBuiltins/X86/avx-builtins.c| 24 
 .../CIR/CodeGenBuiltins/X86/avx2-builtins.c   | 42 ++
 .../CIR/CodeGenBuiltins/X86/sse41-builtins.c  | 57 +--
 4 files changed, 135 insertions(+), 10 deletions(-)

diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp 
b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index 1c87e945de846..46de5bb5b9036 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -1275,6 +1275,10 @@ CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, 
const CallExpr *expr) {
   }
   case X86::BI__builtin_ia32_pmovqd512_mask:
   case X86::BI__builtin_ia32_pmovwb512_mask:
+cgm.errorNYI(expr->getSourceRange(),
+ std::string("unimplemented X86 builtin call: ") +
+ getContext().BuiltinInfo.getName(builtinID));
+return mlir::Value{};
   case X86::BI__builtin_ia32_pblendw128:
   case X86::BI__builtin_ia32_blendpd:
   case X86::BI__builtin_ia32_blendps:
@@ -1282,11 +1286,19 @@ CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, 
const CallExpr *expr) {
   case X86::BI__builtin_ia32_blendps256:
   case X86::BI__builtin_ia32_pblendw256:
   case X86::BI__builtin_ia32_pblendd128:
-  case X86::BI__builtin_ia32_pblendd256:
-cgm.errorNYI(expr->getSourceRange(),
- std::string("unimplemented X86 builtin call: ") +
- getContext().BuiltinInfo.getName(builtinID));
-return mlir::Value{};
+  case X86::BI__builtin_ia32_pblendd256: {
+uint32_t imm = getZExtIntValueFromConstOp(ops[2]);
+unsigned numElts = cast(ops[0].getType()).getSize();
+
+int64_t indices[16];
+// If there are more than 8 elements, the immediate is used twice so make
+// sure we handle that.
+for (unsigned i = 0; i != numElts; ++i)
+  indices[i] = ((imm >> (i % 8)) & 0x1) ? numElts + i : i;
+
+return builder.createVecShuffle(getLoc(expr->getExprLoc()), ops[0], ops[1],
+ArrayRef(indices, numElts));
+  }
   case X86::BI__builtin_ia32_pshuflw:
   case X86::BI__builtin_ia32_pshuflw256:
   case X86::BI__builtin_ia32_pshuflw512:
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx-builtins.c 
b/clang/test/CIR/CodeGenBuiltins/X86/avx-builtins.c
index 0de782701ddc4..01ca55994ce50 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/avx-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx-builtins.c
@@ -75,6 +75,30 @@ __m256i test_mm256_undefined_si256(void) {
   return _mm256_undefined_si256();
 }
 
+__m256d test_mm256_blend_pd(__m256d A, __m256d B) {
+  // CIR-LABEL: test_mm256_blend_pd
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<4 x 
!cir.double>) [#cir.int<4> : !s32i, #cir.int<1> : !s32i, #cir.int<6> : !s32i, 
#cir.int<3> : !s32i] : !cir.vector<4 x !cir.double>
+
+  // LLVM-LABEL: test_mm256_blend_pd
+  // LLVM: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> 

+
+  // OGCG-LABEL: test_mm256_blend_pd
+  // OGCG: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> 

+  return _mm256_blend_pd(A, B, 0x05);
+}
+
+__m256 test_mm256_blend_ps(__m256 A, __m256 B) {
+  // CIR-LABEL: test_mm256_blend_ps
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<8 x 
!cir.float>) [#cir.int<8> : !s32i, #cir.int<1> : !s32i, #cir.int<10> : !s32i, 
#cir.int<3> : !s32i, #cir.int<12> : !s32i, #cir.int<13> : !s32i, #cir.int<6> : 
!s32i, #cir.int<7> : !s32i] : !cir.vector<8 x !cir.float>
+
+  // LLVM-LABEL: test_mm256_blend_ps
+  // LLVM: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> 

+
+  // OGCG-LABEL: test_mm256_blend_ps
+  // OGCG: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> 

+  return _mm256_blend_ps(A, B, 0x35);
+}
+
 __m256d test_mm256_insertf128_pd(__m256d A, __m128d B) {
   // CIR-LABEL: test_mm256_insertf128_pd
   // %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<2 x 
!cir.double>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, 
#cir.int<3> : !s32i] : !cir.vector<4 x !cir.double>
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx2-builtins.c 
b/clang/test/CIR/CodeGenBuiltins/X86/avx2-builtins.c
index d4ed43ca1d26b..d3e54920ef186 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/avx2-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx2-builtins.c
@@ -58,6 +58,48 @@ __m256i test1_mm256_inserti128_si256(__m256i a, __m128i b) {
   return _mm256_inserti128_si256(a, b, 1);
 }
 
+__m256i test_mm256_blend_epi16(__m256i a, __m256i b) {
+  // CIR-LABEL: _mm256_blend_e