https://github.com/GeneraluseAI updated 
https://github.com/llvm/llvm-project/pull/169853

>From 68d72304eebad3ae27d415d1c3d56fd661a99755 Mon Sep 17 00:00:00 2001
From: generaluseai <[email protected]>
Date: Fri, 28 Nov 2025 03:05:24 +0800
Subject: [PATCH] [CIR][X86] Implement lowering for pmuldq / pmuludq builtins

This patch adds CIR codegen support for X86 pmuldq and pmuludq operations,
covering the signed and unsigned variants across all supported vector
widths. The builtins now lower to the expected CIR representation matching
the semantics of the corresponding LLVM intrinsics.
---
 clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp    | 52 +++++++++++++++++-
 .../CIR/CodeGenBuiltins/X86/avx2-builtins.c   | 53 +++++++++++++++++++
 .../CodeGenBuiltins/X86/avx512f-builtins.c    | 53 +++++++++++++++++++
 .../CIR/CodeGenBuiltins/X86/sse2-builtins.c   | 23 ++++++++
 .../CIR/CodeGenBuiltins/X86/sse41-builtins.c  | 45 ++++++++++++++++
 5 files changed, 224 insertions(+), 2 deletions(-)
 create mode 100644 clang/test/CIR/CodeGenBuiltins/X86/sse41-builtins.c

diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp 
b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index 2d503ddb08a7d..aaff0d1d445a5 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -269,6 +269,40 @@ static mlir::Value emitX86FunnelShift(CIRGenBuilderTy 
&builder,
                              mlir::ValueRange{op0, op1, amt});
 }
 
+static mlir::Value emitX86Muldq(CIRGenBuilderTy &builder, mlir::Location loc,
+                                bool isSigned,
+                                SmallVectorImpl<mlir::Value> &ops,
+                                unsigned opTypePrimitiveSizeInBits) {
+  mlir::Type ty = cir::VectorType::get(builder.getSInt64Ty(),
+                                       opTypePrimitiveSizeInBits / 64);
+  mlir::Value lhs = builder.createBitcast(loc, ops[0], ty);
+  mlir::Value rhs = builder.createBitcast(loc, ops[1], ty);
+  if (isSigned) {
+    cir::ConstantOp shiftAmt =
+        builder.getConstant(loc, cir::IntAttr::get(builder.getSInt64Ty(), 32));
+    cir::VecSplatOp shiftSplatVecOp =
+        cir::VecSplatOp::create(builder, loc, ty, shiftAmt.getResult());
+    mlir::Value shiftSplatValue = shiftSplatVecOp.getResult();
+    // In CIR, right-shift operations are automatically lowered to either an
+    // arithmetic or logical shift depending on the operand type. The purpose
+    // of the shifts here is to propagate the sign bit of the 32-bit input
+    // into the upper bits of each vector lane.
+    lhs = builder.createShift(loc, lhs, shiftSplatValue, true);
+    lhs = builder.createShift(loc, lhs, shiftSplatValue, false);
+    rhs = builder.createShift(loc, rhs, shiftSplatValue, true);
+    rhs = builder.createShift(loc, rhs, shiftSplatValue, false);
+  } else {
+    cir::ConstantOp maskScalar = builder.getConstant(
+        loc, cir::IntAttr::get(builder.getSInt64Ty(), 0xffffffff));
+    cir::VecSplatOp mask =
+        cir::VecSplatOp::create(builder, loc, ty, maskScalar.getResult());
+    // Clear the upper bits
+    lhs = builder.createAnd(loc, lhs, mask);
+    rhs = builder.createAnd(loc, rhs, mask);
+  }
+  return builder.createMul(loc, lhs, rhs);
+}
+
 mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
                                                const CallExpr *expr) {
   if (builtinID == Builtin::BI__builtin_cpu_is) {
@@ -1125,12 +1159,26 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned 
builtinID,
   case X86::BI__builtin_ia32_sqrtph512:
   case X86::BI__builtin_ia32_sqrtps512:
   case X86::BI__builtin_ia32_sqrtpd512:
+    cgm.errorNYI(expr->getSourceRange(),
+                 std::string("unimplemented X86 builtin call: ") +
+                     getContext().BuiltinInfo.getName(builtinID));
+    return {};
   case X86::BI__builtin_ia32_pmuludq128:
   case X86::BI__builtin_ia32_pmuludq256:
-  case X86::BI__builtin_ia32_pmuludq512:
+  case X86::BI__builtin_ia32_pmuludq512: {
+    unsigned opTypePrimitiveSizeInBits =
+        cgm.getDataLayout().getTypeSizeInBits(ops[0].getType());
+    return emitX86Muldq(builder, getLoc(expr->getExprLoc()), /*isSigned*/ 
false,
+                        ops, opTypePrimitiveSizeInBits);
+  }
   case X86::BI__builtin_ia32_pmuldq128:
   case X86::BI__builtin_ia32_pmuldq256:
-  case X86::BI__builtin_ia32_pmuldq512:
+  case X86::BI__builtin_ia32_pmuldq512: {
+    unsigned opTypePrimitiveSizeInBits =
+        cgm.getDataLayout().getTypeSizeInBits(ops[0].getType());
+    return emitX86Muldq(builder, getLoc(expr->getExprLoc()), /*isSigned*/ true,
+                        ops, opTypePrimitiveSizeInBits);
+  }
   case X86::BI__builtin_ia32_pternlogd512_mask:
   case X86::BI__builtin_ia32_pternlogq512_mask:
   case X86::BI__builtin_ia32_pternlogd128_mask:
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx2-builtins.c 
b/clang/test/CIR/CodeGenBuiltins/X86/avx2-builtins.c
index b7497c2053b2d..632c80e8abfbc 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/avx2-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx2-builtins.c
@@ -51,3 +51,56 @@ __m256i test_mm256_shufflehi_epi16(__m256i a) {
   // OGCG: shufflevector <16 x i16> %{{.*}}, <16 x i16> poison, <16 x i32> 
<i32 0, i32 1, i32 2, i32 3, i32 7, i32 6, i32 6, i32 5, i32 8, i32 9, i32 10, 
i32 11, i32 15, i32 14, i32 14, i32 13>
   return _mm256_shufflehi_epi16(a, 107);
 }
+
+
+__m256i test_mm256_mul_epu32(__m256i a, __m256i b) {
+  // CIR-LABEL: _mm256_mul_epu32
+  // CIR: [[BC_A:%.*]] = cir.cast bitcast %{{.*}} : {{.*}} -> !cir.vector<4 x 
!s64i>
+  // CIR: [[BC_B:%.*]] = cir.cast bitcast %{{.*}} : {{.*}} -> !cir.vector<4 x 
!s64i>
+  // CIR: [[MASK_SCALAR:%.*]] = cir.const #cir.int<4294967295> : !s64i
+  // CIR: [[MASK_VEC:%.*]] = cir.vec.splat [[MASK_SCALAR]] : !s64i, 
!cir.vector<4 x !s64i>
+  // CIR: [[AND_A:%.*]] = cir.binop(and, [[BC_A]], [[MASK_VEC]])
+  // CIR: [[AND_B:%.*]] = cir.binop(and, [[BC_B]], [[MASK_VEC]])
+  // CIR: [[MUL:%.*]]   = cir.binop(mul, [[AND_A]], [[AND_B]])
+
+  // LLVM-LABEL: _mm256_mul_epu32
+  // LLVM: and <4 x i64> %{{.*}}, splat (i64 4294967295)
+  // LLVM: and <4 x i64> %{{.*}}, splat (i64 4294967295)
+  // LLVM: mul <4 x i64> %{{.*}}, %{{.*}}
+
+  // OGCG-LABEL: _mm256_mul_epu32
+  // OGCG: and <4 x i64> %{{.*}}, splat (i64 4294967295)
+  // OGCG: and <4 x i64> %{{.*}}, splat (i64 4294967295)
+  // OGCG: mul <4 x i64> %{{.*}}, %{{.*}}
+
+return _mm256_mul_epu32(a, b);
+}
+
+__m256i test_mm256_mul_epi32(__m256i a, __m256i b) {
+  // CIR-LABEL: _mm256_mul_epi32
+  // CIR: [[A64:%.*]] = cir.cast bitcast %{{.*}} : {{.*}} -> !cir.vector<4 x 
!s64i>
+  // CIR: [[B64:%.*]] = cir.cast bitcast %{{.*}} : {{.*}} -> !cir.vector<4 x 
!s64i>
+  // CIR: [[SC:%.*]] = cir.const #cir.int<32> : !s64i
+  // CIR: [[SV:%.*]] = cir.vec.splat [[SC]] : !s64i, !cir.vector<4 x !s64i>
+  // CIR: [[SHL_A:%.*]]  = cir.shift(left, [[A64]] : !cir.vector<4 x !s64i>, 
[[SV]] : !cir.vector<4 x !s64i>)
+  // CIR: [[ASHR_A:%.*]] = cir.shift(right, [[SHL_A]] : !cir.vector<4 x 
!s64i>, [[SV]] : !cir.vector<4 x !s64i>)
+  // CIR: [[SHL_B:%.*]]  = cir.shift(left, [[B64]] : !cir.vector<4 x !s64i>, 
[[SV]] : !cir.vector<4 x !s64i>)
+  // CIR: [[ASHR_B:%.*]] = cir.shift(right, [[SHL_B]] : !cir.vector<4 x 
!s64i>, [[SV]] : !cir.vector<4 x !s64i>)
+  // CIR: [[MUL:%.*]]    = cir.binop(mul, [[ASHR_A]], [[ASHR_B]])
+
+  // LLVM-LABEL: _mm256_mul_epi32
+  // LLVM: shl <4 x i64> %{{.*}}, splat (i64 32)
+  // LLVM: ashr <4 x i64> %{{.*}}, splat (i64 32)
+  // LLVM: shl <4 x i64> %{{.*}}, splat (i64 32)
+  // LLVM: ashr <4 x i64> %{{.*}}, splat (i64 32)
+  // LLVM: mul <4 x i64> %{{.*}}, %{{.*}}
+
+  // OGCG-LABEL: _mm256_mul_epi32
+  // OGCG: shl <4 x i64> %{{.*}}, splat (i64 32)
+  // OGCG: ashr <4 x i64> %{{.*}}, splat (i64 32)
+  // OGCG: shl <4 x i64> %{{.*}}, splat (i64 32)
+  // OGCG: ashr <4 x i64> %{{.*}}, splat (i64 32)
+  // OGCG: mul <4 x i64> %{{.*}}, %{{.*}}
+
+  return _mm256_mul_epi32(a, b);
+}
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512f-builtins.c 
b/clang/test/CIR/CodeGenBuiltins/X86/avx512f-builtins.c
index a0e50ff9b4017..dd91508ef19c4 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/avx512f-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512f-builtins.c
@@ -527,3 +527,56 @@ __m512i test_mm512_ror_epi64(__m512i __A) {
   // OGCG: call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %[[VAR]], <8 x i64> 
%[[VAR]], <8 x i64> splat (i64 5))
   return _mm512_ror_epi64(__A, 5); 
 }
+
+__m512i test_mm512_mul_epi32(__m512i __A, __m512i __B) {
+  // CIR-LABEL: _mm512_mul_epi32
+  // CIR: [[A64:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s32i> -> 
!cir.vector<8 x !s64i>
+  // CIR: [[B64:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s32i> -> 
!cir.vector<8 x !s64i>
+  // CIR: [[SC:%.*]] = cir.const #cir.int<32> : !s64i
+  // CIR: [[SV:%.*]] = cir.vec.splat [[SC]] : !s64i, !cir.vector<8 x !s64i>
+  // CIR: [[SHL_A:%.*]]  = cir.shift(left, [[A64]] : !cir.vector<8 x !s64i>, 
[[SV]] : !cir.vector<8 x !s64i>)
+  // CIR: [[ASHR_A:%.*]] = cir.shift(right, [[SHL_A]] : !cir.vector<8 x 
!s64i>, [[SV]] : !cir.vector<8 x !s64i>)
+  // CIR: [[SHL_B:%.*]]  = cir.shift(left, [[B64]] : !cir.vector<8 x !s64i>, 
[[SV]] : !cir.vector<8 x !s64i>)
+  // CIR: [[ASHR_B:%.*]] = cir.shift(right, [[SHL_B]] : !cir.vector<8 x 
!s64i>, [[SV]] : !cir.vector<8 x !s64i>)
+  // CIR: [[MUL:%.*]]    = cir.binop(mul, [[ASHR_A]], [[ASHR_B]])
+
+  // LLVM-LABEL: _mm512_mul_epi32
+  // LLVM: shl <8 x i64> %{{.*}}, splat (i64 32)
+  // LLVM: ashr <8 x i64> %{{.*}}, splat (i64 32)
+  // LLVM: shl <8 x i64> %{{.*}}, splat (i64 32)
+  // LLVM: ashr <8 x i64> %{{.*}}, splat (i64 32)
+  // LLVM: mul <8 x i64> %{{.*}}, %{{.*}}
+
+  // OGCG-LABEL: _mm512_mul_epi32
+  // OGCG: shl <8 x i64> %{{.*}}, splat (i64 32)
+  // OGCG: ashr <8 x i64> %{{.*}}, splat (i64 32)
+  // OGCG: shl <8 x i64> %{{.*}}, splat (i64 32)
+  // OGCG: ashr <8 x i64> %{{.*}}, splat (i64 32)
+  // OGCG: mul <8 x i64> %{{.*}}, %{{.*}}
+
+  return _mm512_mul_epi32(__A, __B);
+}
+
+
+__m512i test_mm512_mul_epu32(__m512i __A, __m512i __B) {
+  // CIR-LABEL: _mm512_mul_epu32
+  // CIR: [[BC_A:%.*]] = cir.cast bitcast %{{.*}} : {{.*}} -> !cir.vector<8 x 
!s64i>
+  // CIR: [[BC_B:%.*]] = cir.cast bitcast %{{.*}} : {{.*}} -> !cir.vector<8 x 
!s64i>
+  // CIR: [[MASK_SCALAR:%.*]] = cir.const #cir.int<4294967295> : !s64i
+  // CIR: [[MASK_VEC:%.*]] = cir.vec.splat [[MASK_SCALAR]] : !s64i, 
!cir.vector<8 x !s64i>
+  // CIR: [[AND_A:%.*]] = cir.binop(and, [[BC_A]], [[MASK_VEC]])
+  // CIR: [[AND_B:%.*]] = cir.binop(and, [[BC_B]], [[MASK_VEC]])
+  // CIR: [[MUL:%.*]]   = cir.binop(mul, [[AND_A]], [[AND_B]])
+
+  // LLVM-LABEL: _mm512_mul_epu32
+  // LLVM: and <8 x i64> %{{.*}}, splat (i64 4294967295)
+  // LLVM: and <8 x i64> %{{.*}}, splat (i64 4294967295)
+  // LLVM: mul <8 x i64> %{{.*}}, %{{.*}}
+
+  // OGCG-LABEL: _mm512_mul_epu32
+  // OGCG: and <8 x i64> %{{.*}}, splat (i64 4294967295)
+  // OGCG: and <8 x i64> %{{.*}}, splat (i64 4294967295)
+  // OGCG: mul <8 x i64> %{{.*}}, %{{.*}}
+
+return _mm512_mul_epu32(__A, __B);
+}
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/sse2-builtins.c 
b/clang/test/CIR/CodeGenBuiltins/X86/sse2-builtins.c
index 4bb17e9d20bc6..18cf553a3827b 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/sse2-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/sse2-builtins.c
@@ -159,3 +159,26 @@ __m128i test_mm_shuffle_epi32(__m128i A) {
     // OGCG: shufflevector <4 x i32> %{{.*}}, <4 x i32> poison, <4 x i32> <i32 
2, i32 3, i32 0, i32 1>
     return _mm_shuffle_epi32(A, 0x4E);
 }
+
+__m128i test_mm_mul_epu32(__m128i A, __m128i B) {
+  // CIR-LABEL: _mm_mul_epu32
+  // CIR: [[BC_A:%.*]] = cir.cast bitcast %{{.*}} : {{.*}} -> !cir.vector<2 x 
!s64i>
+  // CIR: [[BC_B:%.*]] = cir.cast bitcast %{{.*}} : {{.*}} -> !cir.vector<2 x 
!s64i>
+  // CIR: [[MASK_SCALAR:%.*]] = cir.const #cir.int<4294967295> : !s64i
+  // CIR: [[MASK_VEC:%.*]] = cir.vec.splat [[MASK_SCALAR]] : !s64i, 
!cir.vector<2 x !s64i>
+  // CIR: [[AND_A:%.*]] = cir.binop(and, [[BC_A]], [[MASK_VEC]])
+  // CIR: [[AND_B:%.*]] = cir.binop(and, [[BC_B]], [[MASK_VEC]])
+  // CIR: [[MUL:%.*]]   = cir.binop(mul, [[AND_A]], [[AND_B]])
+
+  // LLVM-LABEL: _mm_mul_epu32
+  // LLVM: and <2 x i64> %{{.*}}, splat (i64 4294967295)
+  // LLVM: and <2 x i64> %{{.*}}, splat (i64 4294967295)
+  // LLVM: mul <2 x i64> %{{.*}}, %{{.*}}
+
+  // OGCG-LABEL: _mm_mul_epu32
+  // OGCG: and <2 x i64> %{{.*}}, splat (i64 4294967295)
+  // OGCG: and <2 x i64> %{{.*}}, splat (i64 4294967295)
+  // OGCG: mul <2 x i64> %{{.*}}, %{{.*}}
+
+  return _mm_mul_epu32(A, B);
+}
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/sse41-builtins.c 
b/clang/test/CIR/CodeGenBuiltins/X86/sse41-builtins.c
new file mode 100644
index 0000000000000..c53d435842b27
--- /dev/null
+++ b/clang/test/CIR/CodeGenBuiltins/X86/sse41-builtins.c
@@ -0,0 +1,45 @@
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +sse4.1 -fclangir -emit-cir -o 
%t.cir -Wall -Werror -Wsign-conversion
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +sse4.1 -fclangir -emit-llvm -o 
%t.ll -Wall -Werror -Wsign-conversion
+// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +sse4.1 -fclangir -emit-cir -o 
%t.cir -Wall -Werror -Wsign-conversion
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +sse4.1 -fclangir -emit-llvm -o 
%t.ll -Wall -Werror -Wsign-conversion
+// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-apple-darwin -target-feature +sse4.1 -emit-llvm -o - -Wall 
-Werror -Wsign-conversion | FileCheck %s --check-prefixes=OGCG
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -fms-extensions 
-fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc 
-target-feature +sse4.1 -emit-llvm -o - -Wall -Werror -Wsign-conversion | 
FileCheck %s --check-prefixes=OGCG
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-apple-darwin -target-feature +sse4.1 -emit-llvm -o - -Wall 
-Werror -Wsign-conversion | FileCheck %s --check-prefixes=OGCG
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -fms-extensions 
-fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc 
-target-feature +sse4.1 -emit-llvm -o - -Wall -Werror -Wsign-conversion | 
FileCheck %s --check-prefixes=OGCG
+
+#include <immintrin.h>
+
+__m128i test_mm_mul_epi32(__m128i x, __m128i y) {
+  // CIR-LABEL: _mm_mul_epi32
+  // CIR: [[A64:%.*]] = cir.cast bitcast %{{.*}} : {{.*}} -> !cir.vector<2 x 
!s64i>
+  // CIR: [[B64:%.*]] = cir.cast bitcast %{{.*}} : {{.*}} -> !cir.vector<2 x 
!s64i>
+  // CIR: [[SC:%.*]] = cir.const #cir.int<32> : !s64i
+  // CIR: [[SV:%.*]] = cir.vec.splat [[SC]] : !s64i, !cir.vector<2 x !s64i>
+  // CIR: [[SHL_A:%.*]]  = cir.shift(left, [[A64]] : !cir.vector<2 x !s64i>, 
[[SV]] : !cir.vector<2 x !s64i>)
+  // CIR: [[ASHR_A:%.*]] = cir.shift(right, [[SHL_A]] : !cir.vector<2 x 
!s64i>, [[SV]] : !cir.vector<2 x !s64i>)
+  // CIR: [[SHL_B:%.*]]  = cir.shift(left, [[B64]] : !cir.vector<2 x !s64i>, 
[[SV]] : !cir.vector<2 x !s64i>)
+  // CIR: [[ASHR_B:%.*]] = cir.shift(right, [[SHL_B]] : !cir.vector<2 x 
!s64i>, [[SV]] : !cir.vector<2 x !s64i>)
+  // CIR: [[MUL:%.*]]    = cir.binop(mul, [[ASHR_A]], [[ASHR_B]])
+
+  // LLVM-LABEL: _mm_mul_epi32
+  // LLVM: shl <2 x i64> %{{.*}}, splat (i64 32)
+  // LLVM: ashr <2 x i64> %{{.*}}, splat (i64 32)
+  // LLVM: shl <2 x i64> %{{.*}}, splat (i64 32)
+  // LLVM: ashr <2 x i64> %{{.*}}, splat (i64 32)
+  // LLVM: mul <2 x i64> %{{.*}}, %{{.*}}
+
+  // OGCG-LABEL: _mm_mul_epi32
+  // OGCG: shl <2 x i64> %{{.*}}, splat (i64 32)
+  // OGCG: ashr <2 x i64> %{{.*}}, splat (i64 32)
+  // OGCG: shl <2 x i64> %{{.*}}, splat (i64 32)
+  // OGCG: ashr <2 x i64> %{{.*}}, splat (i64 32)
+  // OGCG: mul <2 x i64> %{{.*}}, %{{.*}}
+
+  return _mm_mul_epi32(x, y);
+}

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to