Author: Vishruth Thimmaiah
Date: 2026-01-12T17:04:01-08:00
New Revision: c6fc6adb7e32bf6fd77e3e73ad74752881e131fb

URL: 
https://github.com/llvm/llvm-project/commit/c6fc6adb7e32bf6fd77e3e73ad74752881e131fb
DIFF: 
https://github.com/llvm/llvm-project/commit/c6fc6adb7e32bf6fd77e3e73ad74752881e131fb.diff

LOG: [CIR][X86] Add support for `intersect` builtins (#172554)

adds support for the
`__builtin_ia32_vp2intersect_d`/`__builtin_ia32_vp2intersect_q` x86
builtins.

Part of #167765

---------

Signed-off-by: vishruth-thimmaiah <[email protected]>

Added: 
    clang/test/CIR/CodeGenBuiltins/X86/avx512vlvp2intersect-builtins.c
    clang/test/CIR/CodeGenBuiltins/X86/avx512vp2intersect-builtins.c

Modified: 
    clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp

Removed: 
    


################################################################################
diff  --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp 
b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index f88e57aca6a08..cc3af713bc8c2 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -264,15 +264,15 @@ static mlir::Value emitX86MaskTest(CIRGenBuilderTy 
&builder, mlir::Location loc,
                              mlir::ValueRange{lhsVec, rhsVec});
 }
 
-// TODO: The cgf parameter should be removed when all the NYI cases are
-// implemented.
-static std::optional<mlir::Value>
-emitX86MaskedCompareResult(CIRGenFunction &cgf, CIRGenBuilderTy &builder,
-                           mlir::Value cmp, unsigned numElts,
-                           mlir::Value maskIn, mlir::Location loc) {
+static mlir::Value emitX86MaskedCompareResult(CIRGenBuilderTy &builder,
+                                              mlir::Value cmp, unsigned 
numElts,
+                                              mlir::Value maskIn,
+                                              mlir::Location loc) {
   if (maskIn) {
-    cgf.cgm.errorNYI(loc, "emitX86MaskedCompareResult");
-    return {};
+    auto c = mlir::dyn_cast_or_null<cir::ConstantOp>(maskIn.getDefiningOp());
+    if (!c || !c.isAllOnesValue())
+      cmp = builder.createAnd(loc, cmp,
+                              getMaskVecValue(builder, loc, maskIn, numElts));
   }
   if (numElts < 8) {
     llvm::SmallVector<mlir::Attribute> indices;
@@ -340,7 +340,7 @@ emitX86MaskedCompare(CIRGenFunction &cgf, CIRGenBuilderTy 
&builder, unsigned cc,
   if (ops.size() == 4)
     maskIn = ops[3];
 
-  return emitX86MaskedCompareResult(cgf, builder, cmp, numElts, maskIn, loc);
+  return emitX86MaskedCompareResult(builder, cmp, numElts, maskIn, loc);
 }
 
 // TODO: The cgf parameter should be removed when all the NYI cases are
@@ -1840,12 +1840,66 @@ CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, 
const CallExpr *expr) {
   case X86::BI__builtin_ia32_fpclasspd128_mask:
   case X86::BI__builtin_ia32_fpclasspd256_mask:
   case X86::BI__builtin_ia32_fpclasspd512_mask:
+    cgm.errorNYI(expr->getSourceRange(),
+                 std::string("unimplemented X86 builtin call: ") +
+                     getContext().BuiltinInfo.getName(builtinID));
+    return mlir::Value{};
   case X86::BI__builtin_ia32_vp2intersect_q_512:
   case X86::BI__builtin_ia32_vp2intersect_q_256:
   case X86::BI__builtin_ia32_vp2intersect_q_128:
   case X86::BI__builtin_ia32_vp2intersect_d_512:
   case X86::BI__builtin_ia32_vp2intersect_d_256:
-  case X86::BI__builtin_ia32_vp2intersect_d_128:
+  case X86::BI__builtin_ia32_vp2intersect_d_128: {
+    unsigned numElts = cast<cir::VectorType>(ops[0].getType()).getSize();
+    mlir::Location loc = getLoc(expr->getExprLoc());
+    StringRef intrinsicName;
+
+    switch (builtinID) {
+    default:
+      llvm_unreachable("Unexpected builtin");
+    case X86::BI__builtin_ia32_vp2intersect_q_512:
+      intrinsicName = "x86.avx512.vp2intersect.q.512";
+      break;
+    case X86::BI__builtin_ia32_vp2intersect_q_256:
+      intrinsicName = "x86.avx512.vp2intersect.q.256";
+      break;
+    case X86::BI__builtin_ia32_vp2intersect_q_128:
+      intrinsicName = "x86.avx512.vp2intersect.q.128";
+      break;
+    case X86::BI__builtin_ia32_vp2intersect_d_512:
+      intrinsicName = "x86.avx512.vp2intersect.d.512";
+      break;
+    case X86::BI__builtin_ia32_vp2intersect_d_256:
+      intrinsicName = "x86.avx512.vp2intersect.d.256";
+      break;
+    case X86::BI__builtin_ia32_vp2intersect_d_128:
+      intrinsicName = "x86.avx512.vp2intersect.d.128";
+      break;
+    }
+
+    auto resVector = cir::VectorType::get(builder.getBoolTy(), numElts);
+
+    cir::RecordType resRecord =
+        cir::RecordType::get(&getMLIRContext(), {resVector, resVector}, false,
+                             false, cir::RecordType::RecordKind::Struct);
+
+    mlir::Value call =
+        emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()), intrinsicName,
+                            resRecord, mlir::ValueRange{ops[0], ops[1]});
+    mlir::Value result =
+        cir::ExtractMemberOp::create(builder, loc, resVector, call, 0);
+    result = emitX86MaskedCompareResult(builder, result, numElts, nullptr, 
loc);
+    Address addr = Address(
+        ops[2], clang::CharUnits::fromQuantity(std::max(1U, numElts / 8)));
+    builder.createStore(loc, result, addr);
+
+    result = cir::ExtractMemberOp::create(builder, loc, resVector, call, 1);
+    result = emitX86MaskedCompareResult(builder, result, numElts, nullptr, 
loc);
+    addr = Address(ops[3],
+                   clang::CharUnits::fromQuantity(std::max(1U, numElts / 8)));
+    builder.createStore(loc, result, addr);
+    return mlir::Value{};
+  }
   case X86::BI__builtin_ia32_vpmultishiftqb128:
   case X86::BI__builtin_ia32_vpmultishiftqb256:
   case X86::BI__builtin_ia32_vpmultishiftqb512:

diff  --git 
a/clang/test/CIR/CodeGenBuiltins/X86/avx512vlvp2intersect-builtins.c 
b/clang/test/CIR/CodeGenBuiltins/X86/avx512vlvp2intersect-builtins.c
new file mode 100644
index 0000000000000..6882d2e91961e
--- /dev/null
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512vlvp2intersect-builtins.c
@@ -0,0 +1,161 @@
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx512vp2intersect 
-target-feature +avx512vl -fclangir -emit-cir -o %t.cir -Wall -Werror
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx512vp2intersect 
-target-feature +avx512vl -fclangir -emit-llvm -o %t.ll  -Wall -Werror
+// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx512vp2intersect 
-target-feature +avx512vl -fclangir -emit-cir -o %t.cir -Wall -Werror
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx512vp2intersect 
-target-feature +avx512vl -fclangir -emit-llvm -o %t.ll  -Wall -Werror
+// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx512vp2intersect 
-target-feature +avx512vl -emit-llvm -o - -Wall -Werror | FileCheck %s 
-check-prefix=OGCG
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx512vp2intersect 
-target-feature +avx512vl -emit-llvm -o - -Wall -Werror | FileCheck %s 
-check-prefix=OGCG
+
+#include <immintrin.h>
+
+// CIR: !rec_anon_struct = !cir.record<struct  {!cir.vector<8 x !cir.bool>, 
!cir.vector<8 x !cir.bool>}>
+// CIR: !rec_anon_struct1 = !cir.record<struct  {!cir.vector<4 x !cir.bool>, 
!cir.vector<4 x !cir.bool>}>
+// CIR: !rec_anon_struct2 = !cir.record<struct  {!cir.vector<2 x !cir.bool>, 
!cir.vector<2 x !cir.bool>}>
+void test_mm256_2intersect_epi32(__m256i a, __m256i b, __mmask8 *m0, __mmask8 
*m1) {
+  // CIR-LABEL: mm256_2intersect_epi32
+  // CIR: %[[RES:.*]] = cir.call_llvm_intrinsic 
"x86.avx512.vp2intersect.d.256" %{{.*}}, %{{.*}} : (!cir.vector<8 x !s32i>, 
!cir.vector<8 x !s32i>) -> !rec_anon_struct
+  // CIR: %[[VAL1:.*]] = cir.extract_member %[[RES]][0] : !rec_anon_struct -> 
!cir.vector<8 x !cir.bool>
+  // CIR: %[[CAST1:.*]] = cir.cast bitcast %[[VAL1]] : !cir.vector<8 x 
!cir.bool> -> !u8i
+  // CIR: cir.store align(1) %[[CAST1]], %{{.*}} : !u8i, !cir.ptr<!u8i>
+  // CIR: %[[VAL2:.*]] = cir.extract_member %[[RES]][1] : !rec_anon_struct -> 
!cir.vector<8 x !cir.bool>
+  // CIR: %[[CAST2:.*]] = cir.cast bitcast %[[VAL2]] : !cir.vector<8 x 
!cir.bool> -> !u8i
+  // CIR: cir.store align(1) %[[CAST2]], %{{.*}} : !u8i, !cir.ptr<!u8i>
+
+  // LLVM-LABEL: test_mm256_2intersect_epi32
+  // LLVM: %[[RES:.*]] = call { <8 x i1>, <8 x i1> } 
@llvm.x86.avx512.vp2intersect.d.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}})
+  // LLVM: %[[VAL1:.*]] = extractvalue { <8 x i1>, <8 x i1> } %{{.*}}, 0
+  // LLVM: %[[CAST1:.*]] = bitcast <8 x i1> %[[VAL1]] to i8
+  // LLVM: store i8 %[[CAST1]], ptr %{{.*}}, align 1
+  // LLVM: %[[VAL2:.*]] = extractvalue { <8 x i1>, <8 x i1> } %{{.*}}, 1
+  // LLVM: %[[CAST2:.*]] = bitcast <8 x i1> %[[VAL2]] to i8
+  // LLVM: store i8 %[[CAST2]], ptr %{{.*}}, align 1
+
+  // OGCG-LABEL: test_mm256_2intersect_epi32
+  // OGCG: %[[RES:.*]] = call { <8 x i1>, <8 x i1> } 
@llvm.x86.avx512.vp2intersect.d.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}})
+  // OGCG: %[[VAL1:.*]] = extractvalue { <8 x i1>, <8 x i1> } %{{.*}}, 0
+  // OGCG: %[[CAST1:.*]] = bitcast <8 x i1> %[[VAL1]] to i8
+  // OGCG: store i8 %[[CAST1]], ptr %{{.*}}, align 1
+  // OGCG: %[[VAL2:.*]] = extractvalue { <8 x i1>, <8 x i1> } %{{.*}}, 1
+  // OGCG: %[[CAST2:.*]] = bitcast <8 x i1> %[[VAL2]] to i8
+  // OGCG: store i8 %[[CAST2]], ptr %{{.*}}, align 1
+  _mm256_2intersect_epi32(a, b, m0, m1);
+}
+
+void test_mm256_2intersect_epi64(__m256i a, __m256i b, __mmask8 *m0, __mmask8 
*m1) {
+  // CIR-LABEL: mm256_2intersect_epi64
+  // CIR: %[[RES:.*]] = cir.call_llvm_intrinsic 
"x86.avx512.vp2intersect.q.256" %{{.*}}, %{{.*}} : (!cir.vector<4 x !s64i>, 
!cir.vector<4 x !s64i>) -> !rec_anon_struct1
+  // CIR: %[[VAL1:.*]] = cir.extract_member %[[RES]][0] : !rec_anon_struct1 -> 
!cir.vector<4 x !cir.bool>
+  // CIR: %[[ZERO1:.*]] = cir.const #cir.zero : !cir.vector<4 x !cir.bool>
+  // CIR: %[[SHUF1:.*]] = cir.vec.shuffle(%[[VAL1]], %[[ZERO1]] : 
!cir.vector<4 x !cir.bool>) [#cir.int<0> : !s64i, #cir.int<1> : !s64i, 
#cir.int<2> : !s64i, #cir.int<3> : !s64i, #cir.int<4> : !s64i, #cir.int<5> : 
!s64i, #cir.int<6> : !s64i, #cir.int<7> : !s64i] : !cir.vector<8 x !cir.bool>
+  // CIR: %[[CAST1:.*]] = cir.cast bitcast %[[SHUF1]] : !cir.vector<8 x 
!cir.bool> -> !u8i
+  // CIR: cir.store align(1) %[[CAST1]], %{{.*}} : !u8i, !cir.ptr<!u8i>
+  // CIR: %[[VAL2:.*]] = cir.extract_member %[[RES]][1] : !rec_anon_struct1 -> 
!cir.vector<4 x !cir.bool>
+  // CIR: %[[ZERO2:.*]] = cir.const #cir.zero : !cir.vector<4 x !cir.bool>
+  // CIR: %[[SHUF2:.*]] = cir.vec.shuffle(%[[VAL2]], %[[ZERO2]] : 
!cir.vector<4 x !cir.bool>) [#cir.int<0> : !s64i, #cir.int<1> : !s64i, 
#cir.int<2> : !s64i, #cir.int<3> : !s64i, #cir.int<4> : !s64i, #cir.int<5> : 
!s64i, #cir.int<6> : !s64i, #cir.int<7> : !s64i] : !cir.vector<8 x !cir.bool>
+  // CIR: %[[CAST2:.*]] = cir.cast bitcast %[[SHUF2]] : !cir.vector<8 x 
!cir.bool> -> !u8i
+  // CIR: cir.store align(1) %[[CAST2]], %{{.*}} : !u8i, !cir.ptr<!u8i>
+
+  // LLVM-LABEL: test_mm256_2intersect_epi64
+  // LLVM: %[[RES:.*]] = call { <4 x i1>, <4 x i1> } 
@llvm.x86.avx512.vp2intersect.q.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}})
+  // LLVM: %[[VAL1:.*]] = extractvalue { <4 x i1>, <4 x i1> } %{{.*}}, 0
+  // LLVM: %[[SHUF1:.*]] = shufflevector <4 x i1> %[[VAL1]], <4 x i1> 
zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, 
i32 7>
+  // LLVM: %[[CAST1:.*]] = bitcast <8 x i1> %[[SHUF1]] to i8
+  // LLVM: store i8 %[[CAST1]], ptr %{{.*}}, align 1
+  // LLVM: %[[VAL2:.*]] = extractvalue { <4 x i1>, <4 x i1> } %{{.*}}, 1
+  // LLVM: %[[SHUF2:.*]] = shufflevector <4 x i1> %[[VAL2]], <4 x i1> 
zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, 
i32 7>
+  // LLVM: %[[CAST2:.*]] = bitcast <8 x i1> %[[SHUF2]] to i8
+  // LLVM: store i8 %[[CAST2]], ptr %{{.*}}, align 1
+
+  // OGCG-LABEL: test_mm256_2intersect_epi64
+  // OGCG: %[[RES:.*]] = call { <4 x i1>, <4 x i1> } 
@llvm.x86.avx512.vp2intersect.q.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}})
+  // OGCG: %[[VAL1:.*]] = extractvalue { <4 x i1>, <4 x i1> } %{{.*}}, 0
+  // OGCG: %[[SHUF1:.*]] = shufflevector <4 x i1> %[[VAL1]], <4 x i1> 
zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, 
i32 7>
+  // OGCG: %[[CAST1:.*]] = bitcast <8 x i1> %[[SHUF1]] to i8
+  // OGCG: store i8 %[[CAST1]], ptr %{{.*}}, align 1
+  // OGCG: %[[VAL2:.*]] = extractvalue { <4 x i1>, <4 x i1> } %{{.*}}, 1
+  // OGCG: %[[SHUF2:.*]] = shufflevector <4 x i1> %[[VAL2]], <4 x i1> 
zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, 
i32 7>
+  // OGCG: %[[CAST2:.*]] = bitcast <8 x i1> %[[SHUF2]] to i8
+  // OGCG: store i8 %[[CAST2]], ptr %{{.*}}, align 1
+  _mm256_2intersect_epi64(a, b, m0, m1);
+}
+
+void test_mm_2intersect_epi32(__m128i a, __m128i b, __mmask8 *m0, __mmask8 
*m1) {
+  // CIR-LABEL: mm_2intersect_epi32
+  // CIR: %[[RES:.*]] = cir.call_llvm_intrinsic 
"x86.avx512.vp2intersect.d.128" %{{.*}}, %{{.*}} : (!cir.vector<4 x !s32i>, 
!cir.vector<4 x !s32i>) -> !rec_anon_struct1
+  // CIR: %[[VAL1:.*]] = cir.extract_member %[[RES]][0] : !rec_anon_struct1 -> 
!cir.vector<4 x !cir.bool>
+  // CIR: %[[ZERO1:.*]] = cir.const #cir.zero : !cir.vector<4 x !cir.bool>
+  // CIR: %[[SHUF1:.*]] = cir.vec.shuffle(%[[VAL1]], %[[ZERO1]] : 
!cir.vector<4 x !cir.bool>) [#cir.int<0> : !s64i, #cir.int<1> : !s64i, 
#cir.int<2> : !s64i, #cir.int<3> : !s64i, #cir.int<4> : !s64i, #cir.int<5> : 
!s64i, #cir.int<6> : !s64i, #cir.int<7> : !s64i] : !cir.vector<8 x !cir.bool>
+  // CIR: %[[CAST1:.*]] = cir.cast bitcast %[[SHUF1]] : !cir.vector<8 x 
!cir.bool> -> !u8i
+  // CIR: cir.store align(1) %[[CAST1]], %{{.*}} : !u8i, !cir.ptr<!u8i>
+  // CIR: %[[VAL2:.*]] = cir.extract_member %[[RES]][1] : !rec_anon_struct1 -> 
!cir.vector<4 x !cir.bool>
+  // CIR: %[[ZERO2:.*]] = cir.const #cir.zero : !cir.vector<4 x !cir.bool>
+  // CIR: %[[SHUF2:.*]] = cir.vec.shuffle(%[[VAL2]], %[[ZERO2]] : 
!cir.vector<4 x !cir.bool>) [#cir.int<0> : !s64i, #cir.int<1> : !s64i, 
#cir.int<2> : !s64i, #cir.int<3> : !s64i, #cir.int<4> : !s64i, #cir.int<5> : 
!s64i, #cir.int<6> : !s64i, #cir.int<7> : !s64i] : !cir.vector<8 x !cir.bool>
+  // CIR: %[[CAST2:.*]] = cir.cast bitcast %[[SHUF2]] : !cir.vector<8 x 
!cir.bool> -> !u8i
+  // CIR: cir.store align(1) %[[CAST2]], %{{.*}} : !u8i, !cir.ptr<!u8i>
+
+  // LLVM-LABEL: test_mm_2intersect_epi32
+  // LLVM: %[[RES:.*]] = call { <4 x i1>, <4 x i1> } 
@llvm.x86.avx512.vp2intersect.d.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  // LLVM: %[[VAL1:.*]] = extractvalue { <4 x i1>, <4 x i1> } %{{.*}}, 0
+  // LLVM: %[[SHUF1:.*]] = shufflevector <4 x i1> %[[VAL1]], <4 x i1> 
zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, 
i32 7>
+  // LLVM: %[[CAST1:.*]] = bitcast <8 x i1> %[[SHUF1]] to i8
+  // LLVM: store i8 %[[CAST1]], ptr %{{.*}}, align 1
+  // LLVM: %[[VAL2:.*]] = extractvalue { <4 x i1>, <4 x i1> } %{{.*}}, 1
+  // LLVM: %[[SHUF2:.*]] = shufflevector <4 x i1> %[[VAL2]], <4 x i1> 
zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, 
i32 7>
+  // LLVM: %[[CAST2:.*]] = bitcast <8 x i1> %[[SHUF2]] to i8
+  // LLVM: store i8 %[[CAST2]], ptr %{{.*}}, align 1
+
+  // OGCG-LABEL: test_mm_2intersect_epi32
+  // OGCG: %[[RES:.*]] = call { <4 x i1>, <4 x i1> } 
@llvm.x86.avx512.vp2intersect.d.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  // OGCG: %[[VAL1:.*]] = extractvalue { <4 x i1>, <4 x i1> } %{{.*}}, 0
+  // OGCG: %[[SHUF1:.*]] = shufflevector <4 x i1> %[[VAL1]], <4 x i1> 
zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, 
i32 7>
+  // OGCG: %[[CAST1:.*]] = bitcast <8 x i1> %[[SHUF1]] to i8
+  // OGCG: store i8 %[[CAST1]], ptr %{{.*}}, align 1
+  // OGCG: %[[VAL2:.*]] = extractvalue { <4 x i1>, <4 x i1> } %{{.*}}, 1
+  // OGCG: %[[SHUF2:.*]] = shufflevector <4 x i1> %[[VAL2]], <4 x i1> 
zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, 
i32 7>
+  // OGCG: %[[CAST2:.*]] = bitcast <8 x i1> %[[SHUF2]] to i8
+  // OGCG: store i8 %[[CAST2]], ptr %{{.*}}, align 1
+  _mm_2intersect_epi32(a, b, m0, m1);
+}
+
+void test_mm_2intersect_epi64(__m128i a, __m128i b, __mmask8 *m0, __mmask8 
*m1) {
+  // CIR-LABEL: mm_2intersect_epi64
+  // CIR: %[[RES:.*]] = cir.call_llvm_intrinsic 
"x86.avx512.vp2intersect.q.128" %{{.*}}, %{{.*}} : (!cir.vector<2 x !s64i>, 
!cir.vector<2 x !s64i>) -> !rec_anon_struct2
+  // CIR: %[[VAL1:.*]] = cir.extract_member %[[RES]][0] : !rec_anon_struct2 -> 
!cir.vector<2 x !cir.bool>
+  // CIR: %[[ZERO1:.*]] = cir.const #cir.zero : !cir.vector<2 x !cir.bool>
+  // CIR: %[[SHUF1:.*]] = cir.vec.shuffle(%[[VAL1]], %[[ZERO1]] : 
!cir.vector<2 x !cir.bool>) [#cir.int<0> : !s64i, #cir.int<1> : !s64i, 
#cir.int<2> : !s64i, #cir.int<3> : !s64i, #cir.int<2> : !s64i, #cir.int<3> : 
!s64i, #cir.int<2> : !s64i, #cir.int<3> : !s64i] : !cir.vector<8 x !cir.bool>
+  // CIR: %[[CAST1:.*]] = cir.cast bitcast %[[SHUF1]] : !cir.vector<8 x 
!cir.bool> -> !u8i
+  // CIR: cir.store align(1) %[[CAST1]], %{{.*}} : !u8i, !cir.ptr<!u8i>
+  // CIR: %[[VAL2:.*]] = cir.extract_member %[[RES]][1] : !rec_anon_struct2 -> 
!cir.vector<2 x !cir.bool>
+  // CIR: %[[ZERO2:.*]] = cir.const #cir.zero : !cir.vector<2 x !cir.bool>
+  // CIR: %[[SHUF2:.*]] = cir.vec.shuffle(%[[VAL2]], %[[ZERO2]] : 
!cir.vector<2 x !cir.bool>) [#cir.int<0> : !s64i, #cir.int<1> : !s64i, 
#cir.int<2> : !s64i, #cir.int<3> : !s64i, #cir.int<2> : !s64i, #cir.int<3> : 
!s64i, #cir.int<2> : !s64i, #cir.int<3> : !s64i] : !cir.vector<8 x !cir.bool>
+  // CIR: %[[CAST2:.*]] = cir.cast bitcast %[[SHUF2]] : !cir.vector<8 x 
!cir.bool> -> !u8i
+  // CIR: cir.store align(1) %[[CAST2]], %{{.*}} : !u8i, !cir.ptr<!u8i>
+
+  // LLVM-LABEL: test_mm_2intersect_epi64
+  // LLVM: %[[RES:.*]] = call { <2 x i1>, <2 x i1> } 
@llvm.x86.avx512.vp2intersect.q.128(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  // LLVM: %[[VAL1:.*]] = extractvalue { <2 x i1>, <2 x i1> } %{{.*}}, 0
+  // LLVM: %[[SHUF1:.*]] = shufflevector <2 x i1> %[[VAL1]], <2 x i1> 
zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, 
i32 3>
+  // LLVM: %[[CAST1:.*]] = bitcast <8 x i1> %[[SHUF1]] to i8
+  // LLVM: store i8 %[[CAST1]], ptr %{{.*}}, align 1
+  // LLVM: %[[VAL2:.*]] = extractvalue { <2 x i1>, <2 x i1> } %{{.*}}, 1
+  // LLVM: %[[SHUF2:.*]] = shufflevector <2 x i1> %[[VAL2]], <2 x i1> 
zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, 
i32 3>
+  // LLVM: %[[CAST2:.*]] = bitcast <8 x i1> %[[SHUF2]] to i8
+  // LLVM: store i8 %[[CAST2]], ptr %{{.*}}, align 1
+
+  // OGCG-LABEL: test_mm_2intersect_epi64
+  // OGCG: %[[RES:.*]] = call { <2 x i1>, <2 x i1> } 
@llvm.x86.avx512.vp2intersect.q.128(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  // OGCG: %[[VAL1:.*]] = extractvalue { <2 x i1>, <2 x i1> } %{{.*}}, 0
+  // OGCG: %[[SHUF1:.*]] = shufflevector <2 x i1> %[[VAL1]], <2 x i1> 
zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, 
i32 3>
+  // OGCG: %[[CAST1:.*]] = bitcast <8 x i1> %[[SHUF1]] to i8
+  // OGCG: store i8 %[[CAST1]], ptr %{{.*}}, align 1
+  // OGCG: %[[VAL2:.*]] = extractvalue { <2 x i1>, <2 x i1> } %{{.*}}, 1
+  // OGCG: %[[SHUF2:.*]] = shufflevector <2 x i1> %[[VAL2]], <2 x i1> 
zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, 
i32 3>
+  // OGCG: %[[CAST2:.*]] = bitcast <8 x i1> %[[SHUF2]] to i8
+  // OGCG: store i8 %[[CAST2]], ptr %{{.*}}, align 1
+  _mm_2intersect_epi64(a, b, m0, m1);
+}

diff  --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512vp2intersect-builtins.c 
b/clang/test/CIR/CodeGenBuiltins/X86/avx512vp2intersect-builtins.c
new file mode 100644
index 0000000000000..384477454c43e
--- /dev/null
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512vp2intersect-builtins.c
@@ -0,0 +1,77 @@
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx512vp2intersect -fclangir 
-emit-cir -o %t.cir -Wall -Werror
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx512vp2intersect -fclangir 
-emit-llvm -o %t.ll  -Wall -Werror
+// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx512vp2intersect -fclangir 
-emit-cir -o %t.cir -Wall -Werror
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx512vp2intersect -fclangir 
-emit-llvm -o %t.ll  -Wall -Werror
+// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx512vp2intersect -emit-llvm -o 
- -Wall -Werror | FileCheck %s -check-prefix=OGCG
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx512vp2intersect -emit-llvm -o 
- -Wall -Werror | FileCheck %s -check-prefix=OGCG
+
+#include <immintrin.h>
+
+
+// CIR: !rec_anon_struct = !cir.record<struct  {!cir.vector<16 x !cir.bool>, 
!cir.vector<16 x !cir.bool>}>
+// CIR: !rec_anon_struct1 = !cir.record<struct  {!cir.vector<8 x !cir.bool>, 
!cir.vector<8 x !cir.bool>}>
+void test_mm512_2intersect_epi32(__m512i a, __m512i b, __mmask16 *m0, 
__mmask16 *m1) {
+  // CIR-LABEL: mm512_2intersect_epi32
+  // CIR: %[[RES:.*]] = cir.call_llvm_intrinsic 
"x86.avx512.vp2intersect.d.512" %{{.*}}, %{{.*}} : (!cir.vector<16 x !s32i>, 
!cir.vector<16 x !s32i>) -> !rec_anon_struct
+  // CIR: %[[VAL1:.*]] = cir.extract_member %[[RES]][0] : !rec_anon_struct -> 
!cir.vector<16 x !cir.bool>
+  // CIR: %[[CAST1:.*]] = cir.cast bitcast %[[VAL1]] : !cir.vector<16 x 
!cir.bool> -> !u16i
+  // CIR: cir.store align(2) %[[CAST1]], %{{.*}} : !u16i, !cir.ptr<!u16i>
+  // CIR: %[[VAL2:.*]] = cir.extract_member %[[RES]][1] : !rec_anon_struct -> 
!cir.vector<16 x !cir.bool>
+  // CIR: %[[CAST2:.*]] = cir.cast bitcast %[[VAL2]] : !cir.vector<16 x 
!cir.bool> -> !u16i
+  // CIR: cir.store align(2) %[[CAST2]], %{{.*}} : !u16i, !cir.ptr<!u16i>
+
+  // LLVM-LABEL: test_mm512_2intersect_epi32
+  // LLVM: %[[RES:.*]] = call { <16 x i1>, <16 x i1> } 
@llvm.x86.avx512.vp2intersect.d.512(<16 x i32> %{{.*}}, <16 x i32> %{{.*}})
+  // LLVM: %[[VAL1:.*]] = extractvalue { <16 x i1>, <16 x i1> } %[[RES]], 0
+  // LLVM: %[[CAST1:.*]] = bitcast <16 x i1> %[[VAL1]] to i16
+  // LLVM: store i16 %[[CAST1]], ptr %{{.*}}, align 2
+  // LLVM: %[[VAL2:.*]] = extractvalue { <16 x i1>, <16 x i1> } %[[RES]], 1
+  // LLVM: %[[CAST2:.*]] = bitcast <16 x i1> %[[VAL2]] to i16
+  // LLVM: store i16 %[[CAST2]], ptr %{{.*}}, align 2
+
+  // OGCG-LABEL: test_mm512_2intersect_epi32
+  // OGCG: %[[RES:.*]] = call { <16 x i1>, <16 x i1> } 
@llvm.x86.avx512.vp2intersect.d.512(<16 x i32> %{{.*}}, <16 x i32> %{{.*}})
+  // OGCG: %[[VAL1:.*]] = extractvalue { <16 x i1>, <16 x i1> } %[[RES]], 0
+  // OGCG: %[[CAST1:.*]] = bitcast <16 x i1> %[[VAL1]] to i16
+  // OGCG: store i16 %[[CAST1]], ptr %{{.*}}, align 2
+  // OGCG: %[[VAL2:.*]] = extractvalue { <16 x i1>, <16 x i1> } %[[RES]], 1
+  // OGCG: %[[CAST2:.*]] = bitcast <16 x i1> %[[VAL2]] to i16
+  // OGCG: store i16 %[[CAST2]], ptr %{{.*}}, align 2
+  _mm512_2intersect_epi32(a, b, m0, m1);
+}
+
+void test_mm512_2intersect_epi64(__m512i a, __m512i b, __mmask8 *m0, __mmask8 
*m1) {
+  // CIR-LABEL: mm512_2intersect_epi64
+  // CIR: %[[RES:.*]] = cir.call_llvm_intrinsic 
"x86.avx512.vp2intersect.q.512" %{{.*}}, %{{.*}} : (!cir.vector<8 x !s64i>, 
!cir.vector<8 x !s64i>) -> !rec_anon_struct1
+  // CIR: %[[VAL1:.*]] = cir.extract_member %[[RES]][0] : !rec_anon_struct1 -> 
!cir.vector<8 x !cir.bool>
+  // CIR: %[[CAST1:.*]] = cir.cast bitcast %[[VAL1]] : !cir.vector<8 x 
!cir.bool> -> !u8i
+  // CIR: cir.store align(1) %[[CAST1]], %{{.*}} : !u8i, !cir.ptr<!u8i>
+  // CIR: %[[VAL2:.*]] = cir.extract_member %[[RES]][1] : !rec_anon_struct1 -> 
!cir.vector<8 x !cir.bool>
+  // CIR: %[[CAST2:.*]] = cir.cast bitcast %[[VAL2]] : !cir.vector<8 x 
!cir.bool> -> !u8i
+  // CIR: cir.store align(1) %[[CAST2]], %{{.*}} : !u8i, !cir.ptr<!u8i>
+
+  // LLVM-LABEL: test_mm512_2intersect_epi64
+  // LLVM: %[[RES:.*]] = call { <8 x i1>, <8 x i1> } 
@llvm.x86.avx512.vp2intersect.q.512(<8 x i64> %{{.*}}, <8 x i64> %{{.*}})
+  // LLVM: %[[VAL1:.*]] = extractvalue { <8 x i1>, <8 x i1> } %[[RES]], 0
+  // LLVM: %[[CAST1:.*]] = bitcast <8 x i1> %[[VAL1]] to i8
+  // LLVM: store i8 %[[CAST1]], ptr %{{.*}}, align 1
+  // LLVM: %[[VAL2:.*]] = extractvalue { <8 x i1>, <8 x i1> } %[[RES]], 1
+  // LLVM: %[[CAST2:.*]] = bitcast <8 x i1> %[[VAL2]] to i8
+  // LLVM: store i8 %[[CAST2]], ptr %{{.*}}, align 1
+
+  // OGCG-LABEL: test_mm512_2intersect_epi64
+  // OGCG: %[[RES:.*]] = call { <8 x i1>, <8 x i1> } 
@llvm.x86.avx512.vp2intersect.q.512(<8 x i64> %{{.*}}, <8 x i64> %{{.*}})
+  // OGCG: %[[VAL1:.*]] = extractvalue { <8 x i1>, <8 x i1> } %[[RES]], 0
+  // OGCG: %[[CAST1:.*]] = bitcast <8 x i1> %[[VAL1]] to i8
+  // OGCG: store i8 %[[CAST1]], ptr %{{.*}}, align 1
+  // OGCG: %[[VAL2:.*]] = extractvalue { <8 x i1>, <8 x i1> } %[[RES]], 1
+  // OGCG: %[[CAST2:.*]] = bitcast <8 x i1> %[[VAL2]] to i8
+  // OGCG: store i8 %[[CAST2]], ptr %{{.*}}, align 1
+  _mm512_2intersect_epi64(a, b, m0, m1);
+}


        
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to