[clang] [CIR][X86] Implement lowering for pmuldq / pmuludq builtins (PR #169853)

via cfe-commits Tue, 02 Dec 2025 20:48:23 -0800

https://github.com/GeneraluseAI updated 
https://github.com/llvm/llvm-project/pull/169853


>From e725dbbfc0d46f9f0de6aab5da0d784620c2bed1 Mon Sep 17 00:00:00 2001
From: generaluseai <[email protected]>
Date: Fri, 28 Nov 2025 03:05:24 +0800
Subject: [PATCH] [CIR][X86] Implement lowering for pmuldq / pmuludq builtins

This patch adds CIR codegen support for X86 pmuldq and pmuludq operations,
covering the signed and unsigned variants across all supported vector
widths. The builtins now lower to the expected CIR representation matching
the semantics of the corresponding LLVM intrinsics.
---
 clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp    |  52 +++++-
 .../CIR/CodeGenBuiltins/X86/avx2-builtins.c   | 158 ++++++++++++------
 .../CodeGenBuiltins/X86/avx512f-builtins.c    |  57 ++++++-
 .../CIR/CodeGenBuiltins/X86/sse2-builtins.c   |  38 ++++-
 .../CIR/CodeGenBuiltins/X86/sse41-builtins.c  |  45 +++++
 5 files changed, 286 insertions(+), 64 deletions(-)
 mode change 100644 => 100755 
clang/test/CIR/CodeGenBuiltins/X86/avx512f-builtins.c
 create mode 100644 clang/test/CIR/CodeGenBuiltins/X86/sse41-builtins.c

diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp 
b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index 2d503ddb08a7d..aaff0d1d445a5 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -269,6 +269,40 @@ static mlir::Value emitX86FunnelShift(CIRGenBuilderTy 
&builder,
                              mlir::ValueRange{op0, op1, amt});
 }
 
+static mlir::Value emitX86Muldq(CIRGenBuilderTy &builder, mlir::Location loc,
+                                bool isSigned,
+                                SmallVectorImpl<mlir::Value> &ops,
+                                unsigned opTypePrimitiveSizeInBits) {
+  mlir::Type ty = cir::VectorType::get(builder.getSInt64Ty(),
+                                       opTypePrimitiveSizeInBits / 64);
+  mlir::Value lhs = builder.createBitcast(loc, ops[0], ty);
+  mlir::Value rhs = builder.createBitcast(loc, ops[1], ty);
+  if (isSigned) {
+    cir::ConstantOp shiftAmt =
+        builder.getConstant(loc, cir::IntAttr::get(builder.getSInt64Ty(), 32));
+    cir::VecSplatOp shiftSplatVecOp =
+        cir::VecSplatOp::create(builder, loc, ty, shiftAmt.getResult());
+    mlir::Value shiftSplatValue = shiftSplatVecOp.getResult();
+    // In CIR, right-shift operations are automatically lowered to either an
+    // arithmetic or logical shift depending on the operand type. The purpose
+    // of the shifts here is to propagate the sign bit of the 32-bit input
+    // into the upper bits of each vector lane.
+    lhs = builder.createShift(loc, lhs, shiftSplatValue, true);
+    lhs = builder.createShift(loc, lhs, shiftSplatValue, false);
+    rhs = builder.createShift(loc, rhs, shiftSplatValue, true);
+    rhs = builder.createShift(loc, rhs, shiftSplatValue, false);
+  } else {
+    cir::ConstantOp maskScalar = builder.getConstant(
+        loc, cir::IntAttr::get(builder.getSInt64Ty(), 0xffffffff));
+    cir::VecSplatOp mask =
+        cir::VecSplatOp::create(builder, loc, ty, maskScalar.getResult());
+    // Clear the upper bits
+    lhs = builder.createAnd(loc, lhs, mask);
+    rhs = builder.createAnd(loc, rhs, mask);
+  }
+  return builder.createMul(loc, lhs, rhs);
+}
+
 mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
                                                const CallExpr *expr) {
   if (builtinID == Builtin::BI__builtin_cpu_is) {
@@ -1125,12 +1159,26 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned 
builtinID,
   case X86::BI__builtin_ia32_sqrtph512:
   case X86::BI__builtin_ia32_sqrtps512:
   case X86::BI__builtin_ia32_sqrtpd512:
+    cgm.errorNYI(expr->getSourceRange(),
+                 std::string("unimplemented X86 builtin call: ") +
+                     getContext().BuiltinInfo.getName(builtinID));
+    return {};
   case X86::BI__builtin_ia32_pmuludq128:
   case X86::BI__builtin_ia32_pmuludq256:
-  case X86::BI__builtin_ia32_pmuludq512:
+  case X86::BI__builtin_ia32_pmuludq512: {
+    unsigned opTypePrimitiveSizeInBits =
+        cgm.getDataLayout().getTypeSizeInBits(ops[0].getType());
+    return emitX86Muldq(builder, getLoc(expr->getExprLoc()), /*isSigned*/ 
false,
+                        ops, opTypePrimitiveSizeInBits);
+  }
   case X86::BI__builtin_ia32_pmuldq128:
   case X86::BI__builtin_ia32_pmuldq256:
-  case X86::BI__builtin_ia32_pmuldq512:
+  case X86::BI__builtin_ia32_pmuldq512: {
+    unsigned opTypePrimitiveSizeInBits =
+        cgm.getDataLayout().getTypeSizeInBits(ops[0].getType());
+    return emitX86Muldq(builder, getLoc(expr->getExprLoc()), /*isSigned*/ true,
+                        ops, opTypePrimitiveSizeInBits);
+  }
   case X86::BI__builtin_ia32_pternlogd512_mask:
   case X86::BI__builtin_ia32_pternlogq512_mask:
   case X86::BI__builtin_ia32_pternlogd128_mask:
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx2-builtins.c 
b/clang/test/CIR/CodeGenBuiltins/X86/avx2-builtins.c
index b7497c2053b2d..8023d2365b0cf 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/avx2-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx2-builtins.c
@@ -1,53 +1,105 @@
-// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx2 -fclangir -emit-cir -o 
%t.cir -Wall -Werror
-// RUN: FileCheck --check-prefixes=CIR --input-file=%t.cir %s
-// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx2 -fno-signed-char -fclangir 
-emit-cir -o %t.cir -Wall -Werror
-// RUN: FileCheck --check-prefixes=CIR --input-file=%t.cir %s
-
-// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx2 -fclangir -emit-llvm -o 
%t.ll -Wall -Werror
-// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
-// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx2 -fno-signed-char -fclangir 
-emit-llvm -o %t.ll -Wall -Werror
-// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
-
-// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx2 -fclangir -emit-cir -o 
%t.cir -Wall -Werror
-// RUN: FileCheck --check-prefixes=CIR --input-file=%t.cir %s
-// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx2 -fno-signed-char -fclangir 
-emit-cir -o %t.cir -Wall -Werror
-// RUN: FileCheck --check-prefixes=CIR --input-file=%t.cir %s
-
-// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx2 -fclangir -emit-llvm -o 
%t.ll -Wall -Werror
-// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
-// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx2 -fno-signed-char -fclangir 
-emit-llvm -o %t.ll -Wall -Werror
-// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
-
-// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-apple-darwin -target-feature +avx2 -emit-llvm -o - -Wall -Werror 
| FileCheck %s --check-prefixes=OGCG
-// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-apple-darwin -target-feature +avx2 -fno-signed-char -emit-llvm 
-o - -Wall -Werror | FileCheck %s --check-prefixes=OGCG
-// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-apple-darwin -target-feature +avx2 -emit-llvm -o - -Wall -Werror 
| FileCheck %s --check-prefixes=OGCG
-// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-apple-darwin -target-feature +avx2 -fno-signed-char -emit-llvm 
-o - -Wall -Werror | FileCheck %s --check-prefixes=OGCG
-
-// This test mimics clang/test/CodeGen/X86/avx2-builtins.c, which eventually
-// CIR shall be able to support fully.
-
-#include <immintrin.h>
-
-__m256i test_mm256_shufflelo_epi16(__m256i a) {
-  // CIR-LABEL: _mm256_shufflelo_epi16
-  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<16 x 
!s16i>) [#cir.int<3> : !s32i, #cir.int<0> : !s32i, #cir.int<1> : !s32i, 
#cir.int<1> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<6> : 
!s32i, #cir.int<7> : !s32i, #cir.int<11> : !s32i, #cir.int<8> : !s32i, 
#cir.int<9> : !s32i, #cir.int<9> : !s32i, #cir.int<12> : !s32i, #cir.int<13> : 
!s32i, #cir.int<14> : !s32i, #cir.int<15> : !s32i] : !cir.vector<16 x !s16i>
-
-  // LLVM-LABEL: test_mm256_shufflelo_epi16
-  // LLVM: shufflevector <16 x i16> %{{.*}}, <16 x i16> poison, <16 x i32> 
<i32 3, i32 0, i32 1, i32 1, i32 4, i32 5, i32 6, i32 7, i32 11, i32 8, i32 9, 
i32 9, i32 12, i32 13, i32 14, i32 15>
-
-  // OGCG-LABEL: test_mm256_shufflelo_epi16
-  // OGCG: shufflevector <16 x i16> %{{.*}}, <16 x i16> poison, <16 x i32> 
<i32 3, i32 0, i32 1, i32 1, i32 4, i32 5, i32 6, i32 7, i32 11, i32 8, i32 9, 
i32 9, i32 12, i32 13, i32 14, i32 15>
-  return _mm256_shufflelo_epi16(a, 83);
-}
-
-__m256i test_mm256_shufflehi_epi16(__m256i a) {
-  // CIR-LABEL: _mm256_shufflehi_epi16
-  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<16 x 
!s16i>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, 
#cir.int<3> : !s32i, #cir.int<7> : !s32i, #cir.int<6> : !s32i, #cir.int<6> : 
!s32i, #cir.int<5> : !s32i, #cir.int<8> : !s32i, #cir.int<9> : !s32i, 
#cir.int<10> : !s32i, #cir.int<11> : !s32i, #cir.int<15> : !s32i, #cir.int<14> 
: !s32i, #cir.int<14> : !s32i, #cir.int<13> : !s32i] : !cir.vector<16 x !s16i>
-
-  // LLVM-LABEL: test_mm256_shufflehi_epi16
-  // LLVM: shufflevector <16 x i16> %{{.*}}, <16 x i16> poison, <16 x i32> 
<i32 0, i32 1, i32 2, i32 3, i32 7, i32 6, i32 6, i32 5, i32 8, i32 9, i32 10, 
i32 11, i32 15, i32 14, i32 14, i32 13>
-
-  // OGCG-LABEL: test_mm256_shufflehi_epi16
-  // OGCG: shufflevector <16 x i16> %{{.*}}, <16 x i16> poison, <16 x i32> 
<i32 0, i32 1, i32 2, i32 3, i32 7, i32 6, i32 6, i32 5, i32 8, i32 9, i32 10, 
i32 11, i32 15, i32 14, i32 14, i32 13>
-  return _mm256_shufflehi_epi16(a, 107);
-}
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx2 -fclangir -emit-cir -o 
%t.cir -Wall -Werror
+// RUN: FileCheck --check-prefixes=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx2 -fno-signed-char -fclangir 
-emit-cir -o %t.cir -Wall -Werror
+// RUN: FileCheck --check-prefixes=CIR --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx2 -fclangir -emit-llvm -o 
%t.ll -Wall -Werror
+// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx2 -fno-signed-char -fclangir 
-emit-llvm -o %t.ll -Wall -Werror
+// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx2 -fclangir -emit-cir -o 
%t.cir -Wall -Werror
+// RUN: FileCheck --check-prefixes=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx2 -fno-signed-char -fclangir 
-emit-cir -o %t.cir -Wall -Werror
+// RUN: FileCheck --check-prefixes=CIR --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx2 -fclangir -emit-llvm -o 
%t.ll -Wall -Werror
+// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx2 -fno-signed-char -fclangir 
-emit-llvm -o %t.ll -Wall -Werror
+// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-apple-darwin -target-feature +avx2 -emit-llvm -o - -Wall -Werror 
| FileCheck %s --check-prefixes=OGCG
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-apple-darwin -target-feature +avx2 -fno-signed-char -emit-llvm 
-o - -Wall -Werror | FileCheck %s --check-prefixes=OGCG
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-apple-darwin -target-feature +avx2 -emit-llvm -o - -Wall -Werror 
| FileCheck %s --check-prefixes=OGCG
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-apple-darwin -target-feature +avx2 -fno-signed-char -emit-llvm 
-o - -Wall -Werror | FileCheck %s --check-prefixes=OGCG
+
+// This test mimics clang/test/CodeGen/X86/avx2-builtins.c, which eventually
+// CIR shall be able to support fully.
+
+#include <immintrin.h>
+
+__m256i test_mm256_shufflelo_epi16(__m256i a) {
+  // CIR-LABEL: _mm256_shufflelo_epi16
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<16 x 
!s16i>) [#cir.int<3> : !s32i, #cir.int<0> : !s32i, #cir.int<1> : !s32i, 
#cir.int<1> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<6> : 
!s32i, #cir.int<7> : !s32i, #cir.int<11> : !s32i, #cir.int<8> : !s32i, 
#cir.int<9> : !s32i, #cir.int<9> : !s32i, #cir.int<12> : !s32i, #cir.int<13> : 
!s32i, #cir.int<14> : !s32i, #cir.int<15> : !s32i] : !cir.vector<16 x !s16i>
+
+  // LLVM-LABEL: test_mm256_shufflelo_epi16
+  // LLVM: shufflevector <16 x i16> %{{.*}}, <16 x i16> poison, <16 x i32> 
<i32 3, i32 0, i32 1, i32 1, i32 4, i32 5, i32 6, i32 7, i32 11, i32 8, i32 9, 
i32 9, i32 12, i32 13, i32 14, i32 15>
+
+  // OGCG-LABEL: test_mm256_shufflelo_epi16
+  // OGCG: shufflevector <16 x i16> %{{.*}}, <16 x i16> poison, <16 x i32> 
<i32 3, i32 0, i32 1, i32 1, i32 4, i32 5, i32 6, i32 7, i32 11, i32 8, i32 9, 
i32 9, i32 12, i32 13, i32 14, i32 15>
+  return _mm256_shufflelo_epi16(a, 83);
+}
+
+__m256i test_mm256_shufflehi_epi16(__m256i a) {
+  // CIR-LABEL: _mm256_shufflehi_epi16
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<16 x 
!s16i>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, 
#cir.int<3> : !s32i, #cir.int<7> : !s32i, #cir.int<6> : !s32i, #cir.int<6> : 
!s32i, #cir.int<5> : !s32i, #cir.int<8> : !s32i, #cir.int<9> : !s32i, 
#cir.int<10> : !s32i, #cir.int<11> : !s32i, #cir.int<15> : !s32i, #cir.int<14> 
: !s32i, #cir.int<14> : !s32i, #cir.int<13> : !s32i] : !cir.vector<16 x !s16i>
+
+  // LLVM-LABEL: test_mm256_shufflehi_epi16
+  // LLVM: shufflevector <16 x i16> %{{.*}}, <16 x i16> poison, <16 x i32> 
<i32 0, i32 1, i32 2, i32 3, i32 7, i32 6, i32 6, i32 5, i32 8, i32 9, i32 10, 
i32 11, i32 15, i32 14, i32 14, i32 13>
+
+  // OGCG-LABEL: test_mm256_shufflehi_epi16
+  // OGCG: shufflevector <16 x i16> %{{.*}}, <16 x i16> poison, <16 x i32> 
<i32 0, i32 1, i32 2, i32 3, i32 7, i32 6, i32 6, i32 5, i32 8, i32 9, i32 10, 
i32 11, i32 15, i32 14, i32 14, i32 13>
+  return _mm256_shufflehi_epi16(a, 107);
+}
+
+__m256i test_mm256_mul_epu32(__m256i a, __m256i b) {
+  // CIR-LABEL: _mm256_mul_epu32
+  // CIR: [[BC_A:%.*]] = cir.cast bitcast %{{.*}} : {{.*}} -> !cir.vector<4 x 
!s64i>
+  // CIR: [[BC_B:%.*]] = cir.cast bitcast %{{.*}} : {{.*}} -> !cir.vector<4 x 
!s64i>
+  // CIR: [[MASK_SCALAR:%.*]] = cir.const #cir.int<4294967295> : !s64i
+  // CIR: [[MASK_VEC:%.*]] = cir.vec.splat [[MASK_SCALAR]] : !s64i, 
!cir.vector<4 x !s64i>
+  // CIR: [[AND_A:%.*]] = cir.binop(and, [[BC_A]], [[MASK_VEC]])
+  // CIR: [[AND_B:%.*]] = cir.binop(and, [[BC_B]], [[MASK_VEC]])
+  // CIR: [[MUL:%.*]]   = cir.binop(mul, [[AND_A]], [[AND_B]])
+
+  // LLVM-LABEL: _mm256_mul_epu32
+  // LLVM: and <4 x i64> %{{.*}}, splat (i64 4294967295)
+  // LLVM: and <4 x i64> %{{.*}}, splat (i64 4294967295)
+  // LLVM: mul <4 x i64> %{{.*}}, %{{.*}}
+
+  // OGCG-LABEL: _mm256_mul_epu32
+  // OGCG: and <4 x i64> %{{.*}}, splat (i64 4294967295)
+  // OGCG: and <4 x i64> %{{.*}}, splat (i64 4294967295)
+  // OGCG: mul <4 x i64> %{{.*}}, %{{.*}}
+
+return _mm256_mul_epu32(a, b);
+}
+
+__m256i test_mm256_mul_epi32(__m256i a, __m256i b) {
+  // CIR-LABEL: _mm256_mul_epi32
+  // CIR: [[A64:%.*]] = cir.cast bitcast %{{.*}} : {{.*}} -> !cir.vector<4 x 
!s64i>
+  // CIR: [[B64:%.*]] = cir.cast bitcast %{{.*}} : {{.*}} -> !cir.vector<4 x 
!s64i>
+  // CIR: [[SC:%.*]] = cir.const #cir.int<32> : !s64i
+  // CIR: [[SV:%.*]] = cir.vec.splat [[SC]] : !s64i, !cir.vector<4 x !s64i>
+  // CIR: [[SHL_A:%.*]]  = cir.shift(left, [[A64]] : !cir.vector<4 x !s64i>, 
[[SV]] : !cir.vector<4 x !s64i>)
+  // CIR: [[ASHR_A:%.*]] = cir.shift(right, [[SHL_A]] : !cir.vector<4 x 
!s64i>, [[SV]] : !cir.vector<4 x !s64i>)
+  // CIR: [[SHL_B:%.*]]  = cir.shift(left, [[B64]] : !cir.vector<4 x !s64i>, 
[[SV]] : !cir.vector<4 x !s64i>)
+  // CIR: [[ASHR_B:%.*]] = cir.shift(right, [[SHL_B]] : !cir.vector<4 x 
!s64i>, [[SV]] : !cir.vector<4 x !s64i>)
+  // CIR: [[MUL:%.*]]    = cir.binop(mul, [[ASHR_A]], [[ASHR_B]])
+
+  // LLVM-LABEL: _mm256_mul_epi32
+  // LLVM: shl <4 x i64> %{{.*}}, splat (i64 32)
+  // LLVM: ashr <4 x i64> %{{.*}}, splat (i64 32)
+  // LLVM: shl <4 x i64> %{{.*}}, splat (i64 32)
+  // LLVM: ashr <4 x i64> %{{.*}}, splat (i64 32)
+  // LLVM: mul <4 x i64> %{{.*}}, %{{.*}}
+
+  // OGCG-LABEL: _mm256_mul_epi32
+  // OGCG: shl <4 x i64> %{{.*}}, splat (i64 32)
+  // OGCG: ashr <4 x i64> %{{.*}}, splat (i64 32)
+  // OGCG: shl <4 x i64> %{{.*}}, splat (i64 32)
+  // OGCG: ashr <4 x i64> %{{.*}}, splat (i64 32)
+  // OGCG: mul <4 x i64> %{{.*}}, %{{.*}}
+
+  return _mm256_mul_epi32(a, b);
+}
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512f-builtins.c 
b/clang/test/CIR/CodeGenBuiltins/X86/avx512f-builtins.c
old mode 100644
new mode 100755
index a0e50ff9b4017..0a6cc8ae9adeb
--- a/clang/test/CIR/CodeGenBuiltins/X86/avx512f-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512f-builtins.c
@@ -516,7 +516,7 @@ __m512i test_mm512_ror_epi64(__m512i __A) {
   // CIR: cir.cast integral %{{.*}} : !s32i -> !u32i
   // CIR: cir.cast integral %{{.*}} : !u32i -> !u64i
   // CIR: cir.vec.splat %{{.*}} : !u64i, !cir.vector<8 x !u64i>
-  // CIR: cir.call_llvm_intrinsic "fshr" %{{.*}}: (!cir.vector<8 x !s64i>, 
!cir.vector<8 x !s64i>, !cir.vector<8 x !u64i>) -> !cir.vector<8 x !s64i> 
+  // CIR: cir.call_llvm_intrinsic "fshr" %{{.*}}: (!cir.vector<8 x !s64i>, 
!cir.vector<8 x !s64i>, !cir.vector<8 x !u64i>) -> !cir.vector<8 x !s64i>
 
   // LLVM-LABEL: test_mm512_ror_epi64
   // LLVM: %[[VAR:.*]] = load <8 x i64>, ptr %{{.*}}, align 64
@@ -525,5 +525,58 @@ __m512i test_mm512_ror_epi64(__m512i __A) {
   // OGCG-LABEL: test_mm512_ror_epi64
   // OGCG: %[[VAR:.*]] = load <8 x i64>, ptr %{{.*}}, align 64
   // OGCG: call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %[[VAR]], <8 x i64> 
%[[VAR]], <8 x i64> splat (i64 5))
-  return _mm512_ror_epi64(__A, 5); 
+  return _mm512_ror_epi64(__A, 5);
+}
+
+__m512i test_mm512_mul_epi32(__m512i __A, __m512i __B) {
+  // CIR-LABEL: _mm512_mul_epi32
+  // CIR: [[A64:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s32i> -> 
!cir.vector<8 x !s64i>
+  // CIR: [[B64:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s32i> -> 
!cir.vector<8 x !s64i>
+  // CIR: [[SC:%.*]] = cir.const #cir.int<32> : !s64i
+  // CIR: [[SV:%.*]] = cir.vec.splat [[SC]] : !s64i, !cir.vector<8 x !s64i>
+  // CIR: [[SHL_A:%.*]]  = cir.shift(left, [[A64]] : !cir.vector<8 x !s64i>, 
[[SV]] : !cir.vector<8 x !s64i>)
+  // CIR: [[ASHR_A:%.*]] = cir.shift(right, [[SHL_A]] : !cir.vector<8 x 
!s64i>, [[SV]] : !cir.vector<8 x !s64i>)
+  // CIR: [[SHL_B:%.*]]  = cir.shift(left, [[B64]] : !cir.vector<8 x !s64i>, 
[[SV]] : !cir.vector<8 x !s64i>)
+  // CIR: [[ASHR_B:%.*]] = cir.shift(right, [[SHL_B]] : !cir.vector<8 x 
!s64i>, [[SV]] : !cir.vector<8 x !s64i>)
+  // CIR: [[MUL:%.*]]    = cir.binop(mul, [[ASHR_A]], [[ASHR_B]])
+
+  // LLVM-LABEL: _mm512_mul_epi32
+  // LLVM: shl <8 x i64> %{{.*}}, splat (i64 32)
+  // LLVM: ashr <8 x i64> %{{.*}}, splat (i64 32)
+  // LLVM: shl <8 x i64> %{{.*}}, splat (i64 32)
+  // LLVM: ashr <8 x i64> %{{.*}}, splat (i64 32)
+  // LLVM: mul <8 x i64> %{{.*}}, %{{.*}}
+
+  // OGCG-LABEL: _mm512_mul_epi32
+  // OGCG: shl <8 x i64> %{{.*}}, splat (i64 32)
+  // OGCG: ashr <8 x i64> %{{.*}}, splat (i64 32)
+  // OGCG: shl <8 x i64> %{{.*}}, splat (i64 32)
+  // OGCG: ashr <8 x i64> %{{.*}}, splat (i64 32)
+  // OGCG: mul <8 x i64> %{{.*}}, %{{.*}}
+
+  return _mm512_mul_epi32(__A, __B);
+}
+
+
+__m512i test_mm512_mul_epu32(__m512i __A, __m512i __B) {
+  // CIR-LABEL: _mm512_mul_epu32
+  // CIR: [[BC_A:%.*]] = cir.cast bitcast %{{.*}} : {{.*}} -> !cir.vector<8 x 
!s64i>
+  // CIR: [[BC_B:%.*]] = cir.cast bitcast %{{.*}} : {{.*}} -> !cir.vector<8 x 
!s64i>
+  // CIR: [[MASK_SCALAR:%.*]] = cir.const #cir.int<4294967295> : !s64i
+  // CIR: [[MASK_VEC:%.*]] = cir.vec.splat [[MASK_SCALAR]] : !s64i, 
!cir.vector<8 x !s64i>
+  // CIR: [[AND_A:%.*]] = cir.binop(and, [[BC_A]], [[MASK_VEC]])
+  // CIR: [[AND_B:%.*]] = cir.binop(and, [[BC_B]], [[MASK_VEC]])
+  // CIR: [[MUL:%.*]]   = cir.binop(mul, [[AND_A]], [[AND_B]])
+
+  // LLVM-LABEL: _mm512_mul_epu32
+  // LLVM: and <8 x i64> %{{.*}}, splat (i64 4294967295)
+  // LLVM: and <8 x i64> %{{.*}}, splat (i64 4294967295)
+  // LLVM: mul <8 x i64> %{{.*}}, %{{.*}}
+
+  // OGCG-LABEL: _mm512_mul_epu32
+  // OGCG: and <8 x i64> %{{.*}}, splat (i64 4294967295)
+  // OGCG: and <8 x i64> %{{.*}}, splat (i64 4294967295)
+  // OGCG: mul <8 x i64> %{{.*}}, %{{.*}}
+
+return _mm512_mul_epu32(__A, __B);
 }
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/sse2-builtins.c 
b/clang/test/CIR/CodeGenBuiltins/X86/sse2-builtins.c
index 4bb17e9d20bc6..0559098275535 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/sse2-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/sse2-builtins.c
@@ -149,13 +149,37 @@ __m128d test_mm_shuffle_pd(__m128d A, __m128d B) {
 }
 
 __m128i test_mm_shuffle_epi32(__m128i A) {
-       // CIR-LABEL: test_mm_shuffle_epi32
-       // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}}: !cir.vector<4 x 
!s32i>) [#cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<0> : !s32i, 
#cir.int<1> : !s32i] : !cir.vector<4 x !s32i>
+// CIR-LABEL: test_mm_shuffle_epi32
+// CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}}: !cir.vector<4 x !s32i>) 
[#cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<0> : !s32i, #cir.int<1> : 
!s32i] : !cir.vector<4 x !s32i>
 
-    // LLVM-LABEL: test_mm_shuffle_epi32
-       // LLVM: shufflevector <4 x i32> %{{.*}}, <4 x i32> poison, <4 x i32> 
<i32 2, i32 3, i32 0, i32 1>
+// LLVM-LABEL: test_mm_shuffle_epi32
+// LLVM: shufflevector <4 x i32> %{{.*}}, <4 x i32> poison, <4 x i32> <i32 2, 
i32 3, i32 0, i32 1>
+
+// OGCG-LABEL: test_mm_shuffle_epi32
+// OGCG: shufflevector <4 x i32> %{{.*}}, <4 x i32> poison, <4 x i32> <i32 2, 
i32 3, i32 0, i32 1>
+return _mm_shuffle_epi32(A, 0x4E);
+}
+
+__m128i test_mm_mul_epu32(__m128i A, __m128i B) {
+  // CIR-LABEL: _mm_mul_epu32
+  // CIR: [[BC_A:%.*]] = cir.cast bitcast %{{.*}} : {{.*}} -> !cir.vector<2 x 
!s64i>
+  // CIR: [[BC_B:%.*]] = cir.cast bitcast %{{.*}} : {{.*}} -> !cir.vector<2 x 
!s64i>
+  // CIR: [[MASK_SCALAR:%.*]] = cir.const #cir.int<4294967295> : !s64i
+  // CIR: [[MASK_VEC:%.*]] = cir.vec.splat [[MASK_SCALAR]] : !s64i, 
!cir.vector<2 x !s64i>
+  // CIR: [[AND_A:%.*]] = cir.binop(and, [[BC_A]], [[MASK_VEC]])
+  // CIR: [[AND_B:%.*]] = cir.binop(and, [[BC_B]], [[MASK_VEC]])
+  // CIR: [[MUL:%.*]]   = cir.binop(mul, [[AND_A]], [[AND_B]])
+
+  // LLVM-LABEL: _mm_mul_epu32
+  // LLVM: and <2 x i64> %{{.*}}, splat (i64 4294967295)
+  // LLVM: and <2 x i64> %{{.*}}, splat (i64 4294967295)
+  // LLVM: mul <2 x i64> %{{.*}}, %{{.*}}
+
+  // OGCG-LABEL: _mm_mul_epu32
+  // OGCG: and <2 x i64> %{{.*}}, splat (i64 4294967295)
+  // OGCG: and <2 x i64> %{{.*}}, splat (i64 4294967295)
+  // OGCG: mul <2 x i64> %{{.*}}, %{{.*}}
+
+  return _mm_mul_epu32(A, B);
 
-       // OGCG-LABEL: test_mm_shuffle_epi32
-    // OGCG: shufflevector <4 x i32> %{{.*}}, <4 x i32> poison, <4 x i32> <i32 
2, i32 3, i32 0, i32 1>
-    return _mm_shuffle_epi32(A, 0x4E);
 }
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/sse41-builtins.c 
b/clang/test/CIR/CodeGenBuiltins/X86/sse41-builtins.c
new file mode 100644
index 0000000000000..c53d435842b27
--- /dev/null
+++ b/clang/test/CIR/CodeGenBuiltins/X86/sse41-builtins.c
@@ -0,0 +1,45 @@
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +sse4.1 -fclangir -emit-cir -o 
%t.cir -Wall -Werror -Wsign-conversion
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +sse4.1 -fclangir -emit-llvm -o 
%t.ll -Wall -Werror -Wsign-conversion
+// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +sse4.1 -fclangir -emit-cir -o 
%t.cir -Wall -Werror -Wsign-conversion
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +sse4.1 -fclangir -emit-llvm -o 
%t.ll -Wall -Werror -Wsign-conversion
+// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-apple-darwin -target-feature +sse4.1 -emit-llvm -o - -Wall 
-Werror -Wsign-conversion | FileCheck %s --check-prefixes=OGCG
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -fms-extensions 
-fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc 
-target-feature +sse4.1 -emit-llvm -o - -Wall -Werror -Wsign-conversion | 
FileCheck %s --check-prefixes=OGCG
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-apple-darwin -target-feature +sse4.1 -emit-llvm -o - -Wall 
-Werror -Wsign-conversion | FileCheck %s --check-prefixes=OGCG
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -fms-extensions 
-fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc 
-target-feature +sse4.1 -emit-llvm -o - -Wall -Werror -Wsign-conversion | 
FileCheck %s --check-prefixes=OGCG
+
+#include <immintrin.h>
+
+__m128i test_mm_mul_epi32(__m128i x, __m128i y) {
+  // CIR-LABEL: _mm_mul_epi32
+  // CIR: [[A64:%.*]] = cir.cast bitcast %{{.*}} : {{.*}} -> !cir.vector<2 x 
!s64i>
+  // CIR: [[B64:%.*]] = cir.cast bitcast %{{.*}} : {{.*}} -> !cir.vector<2 x 
!s64i>
+  // CIR: [[SC:%.*]] = cir.const #cir.int<32> : !s64i
+  // CIR: [[SV:%.*]] = cir.vec.splat [[SC]] : !s64i, !cir.vector<2 x !s64i>
+  // CIR: [[SHL_A:%.*]]  = cir.shift(left, [[A64]] : !cir.vector<2 x !s64i>, 
[[SV]] : !cir.vector<2 x !s64i>)
+  // CIR: [[ASHR_A:%.*]] = cir.shift(right, [[SHL_A]] : !cir.vector<2 x 
!s64i>, [[SV]] : !cir.vector<2 x !s64i>)
+  // CIR: [[SHL_B:%.*]]  = cir.shift(left, [[B64]] : !cir.vector<2 x !s64i>, 
[[SV]] : !cir.vector<2 x !s64i>)
+  // CIR: [[ASHR_B:%.*]] = cir.shift(right, [[SHL_B]] : !cir.vector<2 x 
!s64i>, [[SV]] : !cir.vector<2 x !s64i>)
+  // CIR: [[MUL:%.*]]    = cir.binop(mul, [[ASHR_A]], [[ASHR_B]])
+
+  // LLVM-LABEL: _mm_mul_epi32
+  // LLVM: shl <2 x i64> %{{.*}}, splat (i64 32)
+  // LLVM: ashr <2 x i64> %{{.*}}, splat (i64 32)
+  // LLVM: shl <2 x i64> %{{.*}}, splat (i64 32)
+  // LLVM: ashr <2 x i64> %{{.*}}, splat (i64 32)
+  // LLVM: mul <2 x i64> %{{.*}}, %{{.*}}
+
+  // OGCG-LABEL: _mm_mul_epi32
+  // OGCG: shl <2 x i64> %{{.*}}, splat (i64 32)
+  // OGCG: ashr <2 x i64> %{{.*}}, splat (i64 32)
+  // OGCG: shl <2 x i64> %{{.*}}, splat (i64 32)
+  // OGCG: ashr <2 x i64> %{{.*}}, splat (i64 32)
+  // OGCG: mul <2 x i64> %{{.*}}, %{{.*}}
+
+  return _mm_mul_epi32(x, y);
+}

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [CIR][X86] Implement lowering for pmuldq / pmuludq builtins (PR #169853)

Reply via email to