https://github.com/vishruth-thimmaiah updated https://github.com/llvm/llvm-project/pull/172554
>From 1e7f8fe0fdfb12498439a36e09448e700a3a5ab9 Mon Sep 17 00:00:00 2001 From: vishruth-thimmaiah <[email protected]> Date: Wed, 17 Dec 2025 02:28:34 +0530 Subject: [PATCH 1/4] [CIR][X86] Add support for intersect builtins adds support for the `__builtin_ia32_vp2intersect_d`/`__builtin_ia32_vp2intersect_q` x86 builtins Signed-off-by: vishruth-thimmaiah <[email protected]> --- clang/include/clang/CIR/Dialect/IR/CIROps.td | 52 ++++++ clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 83 +++++++++- .../X86/avx512vlvp2intersect-builtins.c | 155 ++++++++++++++++++ .../X86/avx512vp2intersect-builtins.c | 77 +++++++++ 4 files changed, 366 insertions(+), 1 deletion(-) create mode 100644 clang/test/CIR/CodeGenBuiltins/X86/avx512vlvp2intersect-builtins.c create mode 100644 clang/test/CIR/CodeGenBuiltins/X86/avx512vp2intersect-builtins.c diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td index 50798be64551a..41e541d3ff3ef 100644 --- a/clang/include/clang/CIR/Dialect/IR/CIROps.td +++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td @@ -2834,6 +2834,58 @@ def CIR_GetElementOp : CIR_Op<"get_element", [ }]; } +//===----------------------------------------------------------------------===// +// GetMemberValueOp +//===----------------------------------------------------------------------===// + +def CIR_GetMemberValueOp : CIR_Op<"get_member_value"> { + let summary = "Get the value of a member of a record"; + let description = [{ + The `cir.get_member_value` operation gets the value of a particular named + member from the input record. + + It expects the record object as well as the name of the member and its field + index. + + Example: + ```mlir + // Suppose we have a record with multiple members. + !s32i = !cir.int<s, 32> + !s8i = !cir.int<s, 8> + !ty_B = !cir.record<"struct.B" {!s32i, !s8i}> + + // Get the value of the member at index 1. + %1 = cir.get_member_value %0[1] : (!ty_B) -> !s8i + ``` + }]; + + let arguments = (ins + CIR_RecordType:$rec, + IndexAttr:$index_attr); + + let results = (outs CIR_AnyType:$result); + + let assemblyFormat = [{ + $rec `[` $index_attr `]` attr-dict + `:` qualified(type($rec)) `->` qualified(type($result)) + }]; + + let builders = [ + OpBuilder<(ins "mlir::Type":$result, + "mlir::Value":$rec, + "unsigned":$index), + [{ + mlir::APInt fieldIdx(64, index); + build($_builder, $_state, result, rec, fieldIdx); + }]> + ]; + + let extraClassDeclaration = [{ + /// Return the index of the record member being accessed. + uint64_t getIndex() { return getIndexAttr().getZExtValue(); } + }]; +} + //===----------------------------------------------------------------------===// // FuncOp //===----------------------------------------------------------------------===// diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp index 8bf3e63ad9179..7d504b90da7ce 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp @@ -542,6 +542,31 @@ static mlir::Value emitX86vpcom(CIRGenBuilderTy &builder, mlir::Location loc, return builder.createVecCompare(loc, pred, op0, op1); } +static mlir::Value emitX86MaskedCompareResult(CIRGenBuilderTy &builder, + mlir::Location loc, + mlir::Value cmp, unsigned numElts, + mlir::Value maskIn) { + if (maskIn) { + auto c = mlir::dyn_cast_or_null<cir::ConstantOp>(maskIn.getDefiningOp()); + if (!c || !c.isAllOnesValue()) + cmp = builder.createAnd(loc, cmp, + getMaskVecValue(builder, loc, maskIn, numElts)); + } + + if (numElts < 8) { + llvm::SmallVector<mlir::Attribute, 8> indices; + mlir::Type i32Ty = builder.getSInt32Ty(); + for (unsigned i = 0; i != numElts; ++i) + indices.push_back(cir::IntAttr::get(i32Ty, i)); + for (unsigned i = numElts; i != 8; ++i) + indices.push_back(cir::IntAttr::get(i32Ty, i % numElts + numElts)); + cmp = builder.createVecShuffle( + loc, cmp, builder.getNullValue(cmp.getType(), loc), indices); + } + + return builder.createBitcast(cmp, builder.getUIntNTy(std::max(numElts, 8U))); +} + std::optional<mlir::Value> CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, const CallExpr *expr) { if (builtinID == Builtin::BI__builtin_cpu_is) { @@ -1835,12 +1860,68 @@ CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, const CallExpr *expr) { case X86::BI__builtin_ia32_fpclasspd128_mask: case X86::BI__builtin_ia32_fpclasspd256_mask: case X86::BI__builtin_ia32_fpclasspd512_mask: + cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented X86 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + return mlir::Value{}; case X86::BI__builtin_ia32_vp2intersect_q_512: case X86::BI__builtin_ia32_vp2intersect_q_256: case X86::BI__builtin_ia32_vp2intersect_q_128: case X86::BI__builtin_ia32_vp2intersect_d_512: case X86::BI__builtin_ia32_vp2intersect_d_256: - case X86::BI__builtin_ia32_vp2intersect_d_128: + case X86::BI__builtin_ia32_vp2intersect_d_128: { + unsigned numElts = cast<cir::VectorType>(ops[0].getType()).getSize(); + mlir::Location loc = getLoc(expr->getExprLoc()); + StringRef intrinsicName; + + switch (builtinID) { + default: + llvm_unreachable("Unexpected builtin"); + case X86::BI__builtin_ia32_vp2intersect_q_512: + intrinsicName = "x86.avx512.vp2intersect.q.512"; + break; + case X86::BI__builtin_ia32_vp2intersect_q_256: + intrinsicName = "x86.avx512.vp2intersect.q.256"; + break; + case X86::BI__builtin_ia32_vp2intersect_q_128: + intrinsicName = "x86.avx512.vp2intersect.q.128"; + break; + case X86::BI__builtin_ia32_vp2intersect_d_512: + intrinsicName = "x86.avx512.vp2intersect.d.512"; + break; + case X86::BI__builtin_ia32_vp2intersect_d_256: + intrinsicName = "x86.avx512.vp2intersect.d.256"; + break; + case X86::BI__builtin_ia32_vp2intersect_d_128: + intrinsicName = "x86.avx512.vp2intersect.d.128"; + break; + } + + auto resVector = cir::VectorType::get(builder.getBoolTy(), numElts); + llvm::SmallVector<mlir::Type, 2> resultTypes = {resVector, resVector}; + + cir::RecordType resRecord = + cir::RecordType::get(&getMLIRContext(), resultTypes, false, false, + cir::RecordType::RecordKind::Struct); + + llvm::SmallVector<mlir::Value, 2> callOps = {ops[0], ops[1]}; + + mlir::Value call = emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()), + intrinsicName, resRecord, callOps); + mlir::Value result = + cir::GetMemberValueOp::create(builder, loc, resVector, call, 0); + result = emitX86MaskedCompareResult(builder, loc, result, numElts, nullptr); + Address addr = Address( + ops[2], clang::CharUnits::fromQuantity(std::max(1U, numElts / 8))); + builder.createStore(loc, result, addr); + + result = cir::GetMemberValueOp::create(builder, loc, resVector, call, 1); + result = emitX86MaskedCompareResult(builder, loc, result, numElts, nullptr); + addr = Address(ops[3], + clang::CharUnits::fromQuantity(std::max(1U, numElts / 8))); + builder.createStore(loc, result, addr); + return mlir::Value{}; + } case X86::BI__builtin_ia32_vpmultishiftqb128: case X86::BI__builtin_ia32_vpmultishiftqb256: case X86::BI__builtin_ia32_vpmultishiftqb512: diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512vlvp2intersect-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512vlvp2intersect-builtins.c new file mode 100644 index 0000000000000..a07da240727a6 --- /dev/null +++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512vlvp2intersect-builtins.c @@ -0,0 +1,155 @@ +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512vp2intersect -target-feature +avx512vl -fclangir -emit-cir -o %t.cir -Wall -Werror +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512vp2intersect -target-feature +avx512vl -fclangir -emit-llvm -o %t.ll -Wall -Werror +// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s + +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512vp2intersect -target-feature +avx512vl -fclangir -emit-cir -o %t.cir -Wall -Werror +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512vp2intersect -target-feature +avx512vl -fclangir -emit-llvm -o %t.ll -Wall -Werror +// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s + +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512vp2intersect -target-feature +avx512vl -emit-llvm -o - -Wall -Werror | FileCheck %s -check-prefix=OGCG +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512vp2intersect -target-feature +avx512vl -emit-llvm -o - -Wall -Werror | FileCheck %s -check-prefix=OGCG + +#include <immintrin.h> + +// CIR: !rec_anon_struct = !cir.record<struct {!cir.vector<8 x !cir.bool>, !cir.vector<8 x !cir.bool>}> +// CIR: !rec_anon_struct1 = !cir.record<struct {!cir.vector<4 x !cir.bool>, !cir.vector<4 x !cir.bool>}> +// CIR: !rec_anon_struct2 = !cir.record<struct {!cir.vector<2 x !cir.bool>, !cir.vector<2 x !cir.bool>}> +void test_mm256_2intersect_epi32(__m256i a, __m256i b, __mmask8 *m0, __mmask8 *m1) { + // CIR-LABEL: mm256_2intersect_epi32 + // CIR: %[[RES:.*]] = cir.call_llvm_intrinsic "x86.avx512.vp2intersect.d.256" %{{.*}}, %{{.*}} : (!cir.vector<8 x !s32i>, !cir.vector<8 x !s32i>) -> !rec_anon_struct + // CIR: %[[VAL1:.*]] = cir.get_member_value %[[RES]][0] : !rec_anon_struct -> !cir.vector<8 x !cir.bool> + // CIR: %[[CAST1:.*]] = cir.cast bitcast %[[VAL1]] : !cir.vector<8 x !cir.bool> -> !u8i + // CIR: cir.store align(1) %[[CAST1]], %{{.*}} : !u8i, !cir.ptr<!u8i> + // CIR: %[[VAL2:.*]] = cir.get_member_value %[[RES]][1] : !rec_anon_struct -> !cir.vector<8 x !cir.bool> + // CIR: %[[CAST2:.*]] = cir.cast bitcast %[[VAL2]] : !cir.vector<8 x !cir.bool> -> !u8i + // CIR: cir.store align(1) %[[CAST2]], %{{.*}} : !u8i, !cir.ptr<!u8i> + + // LLVM-LABEL: test_mm256_2intersect_epi32 + // LLVM: %[[RES:.*]] = call { <8 x i1>, <8 x i1> } @llvm.x86.avx512.vp2intersect.d.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}) + // LLVM: %[[VAL1:.*]] = extractvalue { <8 x i1>, <8 x i1> } %{{.*}}, 0 + // LLVM: %[[CAST1:.*]] = bitcast <8 x i1> %[[VAL1]] to i8 + // LLVM: store i8 %[[CAST1]], ptr %{{.*}}, align 1 + // LLVM: %[[VAL2:.*]] = extractvalue { <8 x i1>, <8 x i1> } %{{.*}}, 1 + // LLVM: %[[CAST2:.*]] = bitcast <8 x i1> %[[VAL2]] to i8 + // LLVM: store i8 %[[CAST2]], ptr %{{.*}}, align 1 + + // OGCG-LABEL: test_mm256_2intersect_epi32 + // OGCG: %[[RES:.*]] = call { <8 x i1>, <8 x i1> } @llvm.x86.avx512.vp2intersect.d.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}) + // OGCG: %[[VAL1:.*]] = extractvalue { <8 x i1>, <8 x i1> } %{{.*}}, 0 + // OGCG: %[[CAST1:.*]] = bitcast <8 x i1> %[[VAL1]] to i8 + // OGCG: store i8 %[[CAST1]], ptr %{{.*}}, align 1 + // OGCG: %[[VAL2:.*]] = extractvalue { <8 x i1>, <8 x i1> } %{{.*}}, 1 + // OGCG: %[[CAST2:.*]] = bitcast <8 x i1> %[[VAL2]] to i8 + // OGCG: store i8 %[[CAST2]], ptr %{{.*}}, align 1 + _mm256_2intersect_epi32(a, b, m0, m1); +} + +void test_mm256_2intersect_epi64(__m256i a, __m256i b, __mmask8 *m0, __mmask8 *m1) { + // CIR-LABEL: mm256_2intersect_epi64 + // CIR: %[[RES:.*]] = cir.call_llvm_intrinsic "x86.avx512.vp2intersect.q.256" %{{.*}}, %{{.*}} : (!cir.vector<4 x !s64i>, !cir.vector<4 x !s64i>) -> !rec_anon_struct1 + // CIR: %[[VAL1:.*]] = cir.get_member_value %[[RES]][0] : !rec_anon_struct1 -> !cir.vector<4 x !cir.bool> + // CIR: %[[ZERO1:.*]] = cir.const #cir.zero : !cir.vector<4 x !cir.bool> + // CIR: %[[SHUF1:.*]] = cir.vec.shuffle(%[[VAL1]], %[[ZERO1]] : !cir.vector<4 x !cir.bool>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<6> : !s32i, #cir.int<7> : !s32i] : !cir.vector<8 x !cir.bool> + // CIR: %[[CAST1:.*]] = cir.cast bitcast %[[SHUF1]] : !cir.vector<8 x !cir.bool> -> !u8i + // CIR: cir.store align(1) %[[CAST1]], %{{.*}} : !u8i, !cir.ptr<!u8i> + // CIR: %[[VAL2:.*]] = cir.get_member_value %[[RES]][1] : !rec_anon_struct1 -> !cir.vector<4 x !cir.bool> + // CIR: %[[ZERO2:.*]] = cir.const #cir.zero : !cir.vector<4 x !cir.bool> + // CIR: %[[SHUF2:.*]] = cir.vec.shuffle(%[[VAL2]], %[[ZERO2]] : !cir.vector<4 x !cir.bool>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<6> : !s32i, #cir.int<7> : !s32i] : !cir.vector<8 x !cir.bool> + // CIR: %[[CAST2:.*]] = cir.cast bitcast %[[SHUF2]] : !cir.vector<8 x !cir.bool> -> !u8i + // CIR: cir.store align(1) %[[CAST2]], %{{.*}} : !u8i, !cir.ptr<!u8i> + + // LLVM-LABEL: test_mm256_2intersect_epi64 + // LLVM: %[[RES:.*]] = call { <4 x i1>, <4 x i1> } @llvm.x86.avx512.vp2intersect.q.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}) + // LLVM: %[[VAL1:.*]] = extractvalue { <4 x i1>, <4 x i1> } %{{.*}}, 0 + // LLVM: %[[SHUF1:.*]] = shufflevector <4 x i1> %[[VAL1]], <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> + // LLVM: %[[CAST1:.*]] = bitcast <8 x i1> %[[SHUF1]] to i8 + // LLVM: store i8 %[[CAST1]], ptr %{{.*}}, align 1 + // LLVM: %[[VAL2:.*]] = extractvalue { <4 x i1>, <4 x i1> } %{{.*}}, 1 + // LLVM: %[[SHUF2:.*]] = shufflevector <4 x i1> %[[VAL2]], <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> + // LLVM: %[[CAST2:.*]] = bitcast <8 x i1> %[[SHUF2]] to i8 + // LLVM: store i8 %[[CAST2]], ptr %{{.*}}, align 1 + + // OGCG-LABEL: test_mm256_2intersect_epi64 + // OGCG: %[[RES:.*]] = call { <4 x i1>, <4 x i1> } @llvm.x86.avx512.vp2intersect.q.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}) + // OGCG: %[[VAL1:.*]] = extractvalue { <4 x i1>, <4 x i1> } %{{.*}}, 0 + // OGCG: %[[SHUF1:.*]] = shufflevector <4 x i1> %[[VAL1]], <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> + // OGCG: %[[CAST1:.*]] = bitcast <8 x i1> %[[SHUF1]] to i8 + // OGCG: store i8 %[[CAST1]], ptr %{{.*}}, align 1 + // OGCG: %[[VAL2:.*]] = extractvalue { <4 x i1>, <4 x i1> } %{{.*}}, 1 + // OGCG: %[[SHUF2:.*]] = shufflevector <4 x i1> %[[VAL2]], <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> + // OGCG: %[[CAST2:.*]] = bitcast <8 x i1> %[[SHUF2]] to i8 + // OGCG: store i8 %[[CAST2]], ptr %{{.*}}, align 1 + _mm256_2intersect_epi64(a, b, m0, m1); +} + +void test_mm_2intersect_epi32(__m128i a, __m128i b, __mmask8 *m0, __mmask8 *m1) { + // CIR-LABEL: mm_2intersect_epi32 + // CIR: %[[RES:.*]] = cir.call_llvm_intrinsic "x86.avx512.vp2intersect.d.128" %{{.*}}, %{{.*}} : (!cir.vector<4 x !s32i>, !cir.vector<4 x !s32i>) -> !rec_anon_struct1 + // CIR: %[[VAL1:.*]] = cir.get_member_value %[[RES]][0] : !rec_anon_struct1 -> !cir.vector<4 x !cir.bool> + // CIR: %[[ZERO1:.*]] = cir.const #cir.zero : !cir.vector<4 x !cir.bool> + // CIR: %[[SHUF1:.*]] = cir.vec.shuffle(%[[VAL1]], %[[ZERO1]] : !cir.vector<4 x !cir.bool>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<6> : !s32i, #cir.int<7> : !s32i] : !cir.vector<8 x !cir.bool> + // CIR: %[[CAST1:.*]] = cir.cast bitcast %[[SHUF1]] : !cir.vector<8 x !cir.bool> -> !u8i + // CIR: cir.store align(1) %[[CAST1]], %{{.*}} : !u8i, !cir.ptr<!u8i> + // CIR: %[[VAL2:.*]] = cir.get_member_value %[[RES]][1] : !rec_anon_struct1 -> !cir.vector<4 x !cir.bool> + // CIR: %[[ZERO2:.*]] = cir.const #cir.zero : !cir.vector<4 x !cir.bool> + // CIR: %[[SHUF2:.*]] = cir.vec.shuffle(%[[VAL2]], %[[ZERO2]] : !cir.vector<4 x !cir.bool>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<6> : !s32i, #cir.int<7> : !s32i] : !cir.vector<8 x !cir.bool> + // CIR: %[[CAST2:.*]] = cir.cast bitcast %[[SHUF2]] : !cir.vector<8 x !cir.bool> -> !u8i + // CIR: cir.store align(1) %[[CAST2]], %{{.*}} : !u8i, !cir.ptr<!u8i> + + // LLVM-LABEL: test_mm_2intersect_epi32 + // LLVM: %[[RES:.*]] = call { <4 x i1>, <4 x i1> } @llvm.x86.avx512.vp2intersect.d.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}) + // LLVM: %[[VAL1:.*]] = extractvalue { <4 x i1>, <4 x i1> } %{{.*}}, 0 + // LLVM: %[[SHUF1:.*]] = shufflevector <4 x i1> %[[VAL1]], <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> + // LLVM: %[[CAST1:.*]] = bitcast <8 x i1> %[[SHUF1]] to i8 + // LLVM: store i8 %[[CAST1]], ptr %{{.*}}, align 1 + // LLVM: %[[VAL2:.*]] = extractvalue { <4 x i1>, <4 x i1> } %{{.*}}, 1 + // LLVM: %[[SHUF2:.*]] = shufflevector <4 x i1> %[[VAL2]], <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> + // LLVM: %[[CAST2:.*]] = bitcast <8 x i1> %[[SHUF2]] to i8 + // LLVM: store i8 %[[CAST2]], ptr %{{.*}}, align 1 + + // OGCG-LABEL: test_mm_2intersect_epi32 + // OGCG: %[[RES:.*]] = call { <4 x i1>, <4 x i1> } @llvm.x86.avx512.vp2intersect.d.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}) + // OGCG: %[[VAL1:.*]] = extractvalue { <4 x i1>, <4 x i1> } %{{.*}}, 0 + // OGCG: %[[SHUF1:.*]] = shufflevector <4 x i1> %[[VAL1]], <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> + // OGCG: %[[CAST1:.*]] = bitcast <8 x i1> %[[SHUF1]] to i8 + // OGCG: store i8 %[[CAST1]], ptr %{{.*}}, align 1 + // OGCG: %[[VAL2:.*]] = extractvalue { <4 x i1>, <4 x i1> } %{{.*}}, 1 + // OGCG: %[[SHUF2:.*]] = shufflevector <4 x i1> %[[VAL2]], <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> + // OGCG: %[[CAST2:.*]] = bitcast <8 x i1> %[[SHUF2]] to i8 + // OGCG: store i8 %[[CAST2]], ptr %{{.*}}, align 1 + _mm_2intersect_epi32(a, b, m0, m1); +} + +void test_mm_2intersect_epi64(__m128i a, __m128i b, __mmask8 *m0, __mmask8 *m1) { + // CIR-LABEL: mm_2intersect_epi64 + // CIR: %[[RES:.*]] = cir.call_llvm_intrinsic "x86.avx512.vp2intersect.q.128" %{{.*}}, %{{.*}} : (!cir.vector<2 x !s64i>, !cir.vector<2 x !s64i>) -> !rec_anon_struct2 + // CIR: %[[VAL1:.*]] = cir.get_member_value %[[RES]][0] : !rec_anon_struct2 -> !cir.vector<2 x !cir.bool> + // CIR: %[[ZERO1:.*]] = cir.const #cir.zero : !cir.vector<2 x !cir.bool> + // CIR: %[[SHUF1:.*]] = cir.vec.shuffle(%[[VAL1]], %[[ZERO1]] : !cir.vector<2 x !cir.bool>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<8 x !cir.bool> + // CIR: %[[CAST1:.*]] = cir.cast bitcast %[[SHUF1]] : !cir.vector<8 x !cir.bool> -> !u8i + // CIR: cir.store align(1) %[[CAST1]], %{{.*}} : !u8i, !cir.ptr<!u8i> + // CIR: %[[VAL2:.*]] = cir.get_member_value %[[RES]][1] : !rec_anon_struct2 -> !cir.vector<2 x !cir.bool> + // CIR: %[[ZERO2:.*]] = cir.const #cir.zero : !cir.vector<2 x !cir.bool> + // CIR: %[[SHUF2:.*]] = cir.vec.shuffle(%[[VAL2]], %[[ZERO2]] : !cir.vector<2 x !cir.bool>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<8 x !cir.bool> + // CIR: %[[CAST2:.*]] = cir.cast bitcast %[[SHUF2]] : !cir.vector<8 x !cir.bool> -> !u8i + // CIR: cir.store align(1) %[[CAST2]], %{{.*}} : !u8i, !cir.ptr<!u8i> + + // LLVM-LABEL: test_mm_2intersect_epi64 + // LLVM: %[[RES:.*]] = call { <2 x i1>, <2 x i1> } @llvm.x86.avx512.vp2intersect.q.128(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}) + // LLVM: %[[VAL1:.*]] = extractvalue { <2 x i1>, <2 x i1> } %{{.*}}, 0 + // LLVM: %[[SHUF1:.*]] = shufflevector <2 x i1> %[[VAL1]], <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> + // LLVM: %[[CAST1:.*]] = bitcast <8 x i1> %[[SHUF1]] to i8 + // LLVM: store i8 %[[CAST1]], ptr %{{.*}}, align 1 + // LLVM: %[[VAL2:.*]] = extractvalue { <2 x i1>, <2 x i1> } %{{.*}}, 1 + // LLVM: %[[SHUF2:.*]] = shufflevector <2 x i1> %[[VAL2]], <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> + // LLVM: %[[CAST2:.*]] = bitcast <8 x i1> %[[SHUF2]] to i8 + // LLVM: store i8 %[[CAST2]], ptr %{{.*}}, align 1 + + // OGCG-LABEL: test_mm_2intersect_epi64 + // OGCG: %[[RES:.*]] = call { <2 x i1>, <2 x i1> } @llvm.x86.avx512.vp2intersect.q.128(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}) + // OGCG: %[[VAL1:.*]] = extractvalue { <2 x i1>, <2 x i1> } %{{.*}}, 0 + // OGCG: %[[VAL2:.*]] = extractvalue { <2 x i1>, <2 x i1> } %{{.*}}, 1 + _mm_2intersect_epi64(a, b, m0, m1); +} diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512vp2intersect-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512vp2intersect-builtins.c new file mode 100644 index 0000000000000..5856f88385e8d --- /dev/null +++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512vp2intersect-builtins.c @@ -0,0 +1,77 @@ +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512vp2intersect -fclangir -emit-cir -o %t.cir -Wall -Werror +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512vp2intersect -fclangir -emit-llvm -o %t.ll -Wall -Werror +// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s + +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512vp2intersect -fclangir -emit-cir -o %t.cir -Wall -Werror +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512vp2intersect -fclangir -emit-llvm -o %t.ll -Wall -Werror +// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s + +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512vp2intersect -emit-llvm -o - -Wall -Werror | FileCheck %s -check-prefix=OGCG +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512vp2intersect -emit-llvm -o - -Wall -Werror | FileCheck %s -check-prefix=OGCG + +#include <immintrin.h> + + +// CIR: !rec_anon_struct = !cir.record<struct {!cir.vector<16 x !cir.bool>, !cir.vector<16 x !cir.bool>}> +// CIR: !rec_anon_struct1 = !cir.record<struct {!cir.vector<8 x !cir.bool>, !cir.vector<8 x !cir.bool>}> +void test_mm512_2intersect_epi32(__m512i a, __m512i b, __mmask16 *m0, __mmask16 *m1) { + // CIR-LABEL: mm512_2intersect_epi32 + // CIR: %[[RES:.*]] = cir.call_llvm_intrinsic "x86.avx512.vp2intersect.d.512" %{{.*}}, %{{.*}} : (!cir.vector<16 x !s32i>, !cir.vector<16 x !s32i>) -> !rec_anon_struct + // CIR: %[[VAL1:.*]] = cir.get_member_value %[[RES]][0] : !rec_anon_struct -> !cir.vector<16 x !cir.bool> + // CIR: %[[CAST1:.*]] = cir.cast bitcast %[[VAL1]] : !cir.vector<16 x !cir.bool> -> !u16i + // CIR: cir.store align(2) %[[CAST1]], %{{.*}} : !u16i, !cir.ptr<!u16i> + // CIR: %[[VAL2:.*]] = cir.get_member_value %[[RES]][1] : !rec_anon_struct -> !cir.vector<16 x !cir.bool> + // CIR: %[[CAST2:.*]] = cir.cast bitcast %[[VAL2]] : !cir.vector<16 x !cir.bool> -> !u16i + // CIR: cir.store align(2) %[[CAST2]], %{{.*}} : !u16i, !cir.ptr<!u16i> + + // LLVM-LABEL: test_mm512_2intersect_epi32 + // LLVM: %[[RES:.*]] = call { <16 x i1>, <16 x i1> } @llvm.x86.avx512.vp2intersect.d.512(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}) + // LLVM: %[[VAL1:.*]] = extractvalue { <16 x i1>, <16 x i1> } %[[RES]], 0 + // LLVM: %[[CAST1:.*]] = bitcast <16 x i1> %[[VAL1]] to i16 + // LLVM: store i16 %[[CAST1]], ptr %{{.*}}, align 2 + // LLVM: %[[VAL2:.*]] = extractvalue { <16 x i1>, <16 x i1> } %[[RES]], 1 + // LLVM: %[[CAST2:.*]] = bitcast <16 x i1> %[[VAL2]] to i16 + // LLVM: store i16 %[[CAST2]], ptr %{{.*}}, align 2 + + // OGCG-LABEL: test_mm512_2intersect_epi32 + // OGCG: %[[RES:.*]] = call { <16 x i1>, <16 x i1> } @llvm.x86.avx512.vp2intersect.d.512(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}) + // OGCG: %[[VAL1:.*]] = extractvalue { <16 x i1>, <16 x i1> } %[[RES]], 0 + // OGCG: %[[CAST1:.*]] = bitcast <16 x i1> %[[VAL1]] to i16 + // OGCG: store i16 %[[CAST1]], ptr %{{.*}}, align 2 + // OGCG: %[[VAL2:.*]] = extractvalue { <16 x i1>, <16 x i1> } %[[RES]], 1 + // OGCG: %[[CAST2:.*]] = bitcast <16 x i1> %[[VAL2]] to i16 + // OGCG: store i16 %[[CAST2]], ptr %{{.*}}, align 2 + _mm512_2intersect_epi32(a, b, m0, m1); +} + +void test_mm512_2intersect_epi64(__m512i a, __m512i b, __mmask8 *m0, __mmask8 *m1) { + // CIR-LABEL: mm512_2intersect_epi64 + // CIR: %[[RES:.*]] = cir.call_llvm_intrinsic "x86.avx512.vp2intersect.q.512" %{{.*}}, %{{.*}} : (!cir.vector<8 x !s64i>, !cir.vector<8 x !s64i>) -> !rec_anon_struct1 + // CIR: %[[VAL1:.*]] = cir.get_member_value %[[RES]][0] : !rec_anon_struct1 -> !cir.vector<8 x !cir.bool> + // CIR: %[[CAST1:.*]] = cir.cast bitcast %[[VAL1]] : !cir.vector<8 x !cir.bool> -> !u8i + // CIR: cir.store align(1) %[[CAST1]], %{{.*}} : !u8i, !cir.ptr<!u8i> + // CIR: %[[VAL2:.*]] = cir.get_member_value %[[RES]][1] : !rec_anon_struct1 -> !cir.vector<8 x !cir.bool> + // CIR: %[[CAST2:.*]] = cir.cast bitcast %[[VAL2]] : !cir.vector<8 x !cir.bool> -> !u8i + // CIR: cir.store align(1) %[[CAST2]], %{{.*}} : !u8i, !cir.ptr<!u8i> + + // LLVM-LABEL: test_mm512_2intersect_epi64 + // LLVM: %[[RES:.*]] = call { <8 x i1>, <8 x i1> } @llvm.x86.avx512.vp2intersect.q.512(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}) + // LLVM: %[[VAL1:.*]] = extractvalue { <8 x i1>, <8 x i1> } %[[RES]], 0 + // LLVM: %[[CAST1:.*]] = bitcast <8 x i1> %[[VAL1]] to i8 + // LLVM: store i8 %[[CAST1]], ptr %{{.*}}, align 1 + // LLVM: %[[VAL2:.*]] = extractvalue { <8 x i1>, <8 x i1> } %[[RES]], 1 + // LLVM: %[[CAST2:.*]] = bitcast <8 x i1> %[[VAL2]] to i8 + // LLVM: store i8 %[[CAST2]], ptr %{{.*}}, align 1 + + // OGCG-LABEL: test_mm512_2intersect_epi64 + // OGCG: %[[RES:.*]] = call { <8 x i1>, <8 x i1> } @llvm.x86.avx512.vp2intersect.q.512(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}) + // OGCG: %[[VAL1:.*]] = extractvalue { <8 x i1>, <8 x i1> } %[[RES]], 0 + // OGCG: %[[CAST1:.*]] = bitcast <8 x i1> %[[VAL1]] to i8 + // OGCG: store i8 %[[CAST1]], ptr %{{.*}}, align 1 + // OGCG: %[[VAL2:.*]] = extractvalue { <8 x i1>, <8 x i1> } %[[RES]], 1 + // OGCG: %[[CAST2:.*]] = bitcast <8 x i1> %[[VAL2]] to i8 + // OGCG: store i8 %[[CAST2]], ptr %{{.*}}, align 1 + _mm512_2intersect_epi64(a, b, m0, m1); +} >From 38c261b6b5d131d1a439ee0432dd1d52d5b0c3d7 Mon Sep 17 00:00:00 2001 From: vishruth-thimmaiah <[email protected]> Date: Wed, 24 Dec 2025 00:14:38 +0530 Subject: [PATCH 2/4] use extract_member instead Signed-off-by: vishruth-thimmaiah <[email protected]> --- clang/include/clang/CIR/Dialect/IR/CIROps.td | 52 ------------------- clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 16 +++--- .../X86/avx512vlvp2intersect-builtins.c | 16 +++--- .../X86/avx512vp2intersect-builtins.c | 8 +-- 4 files changed, 19 insertions(+), 73 deletions(-) diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td index 41e541d3ff3ef..50798be64551a 100644 --- a/clang/include/clang/CIR/Dialect/IR/CIROps.td +++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td @@ -2834,58 +2834,6 @@ def CIR_GetElementOp : CIR_Op<"get_element", [ }]; } -//===----------------------------------------------------------------------===// -// GetMemberValueOp -//===----------------------------------------------------------------------===// - -def CIR_GetMemberValueOp : CIR_Op<"get_member_value"> { - let summary = "Get the value of a member of a record"; - let description = [{ - The `cir.get_member_value` operation gets the value of a particular named - member from the input record. - - It expects the record object as well as the name of the member and its field - index. - - Example: - ```mlir - // Suppose we have a record with multiple members. - !s32i = !cir.int<s, 32> - !s8i = !cir.int<s, 8> - !ty_B = !cir.record<"struct.B" {!s32i, !s8i}> - - // Get the value of the member at index 1. - %1 = cir.get_member_value %0[1] : (!ty_B) -> !s8i - ``` - }]; - - let arguments = (ins - CIR_RecordType:$rec, - IndexAttr:$index_attr); - - let results = (outs CIR_AnyType:$result); - - let assemblyFormat = [{ - $rec `[` $index_attr `]` attr-dict - `:` qualified(type($rec)) `->` qualified(type($result)) - }]; - - let builders = [ - OpBuilder<(ins "mlir::Type":$result, - "mlir::Value":$rec, - "unsigned":$index), - [{ - mlir::APInt fieldIdx(64, index); - build($_builder, $_state, result, rec, fieldIdx); - }]> - ]; - - let extraClassDeclaration = [{ - /// Return the index of the record member being accessed. - uint64_t getIndex() { return getIndexAttr().getZExtValue(); } - }]; -} - //===----------------------------------------------------------------------===// // FuncOp //===----------------------------------------------------------------------===// diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp index 7d504b90da7ce..bd9f2ecc87dbb 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp @@ -1898,24 +1898,22 @@ CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, const CallExpr *expr) { } auto resVector = cir::VectorType::get(builder.getBoolTy(), numElts); - llvm::SmallVector<mlir::Type, 2> resultTypes = {resVector, resVector}; cir::RecordType resRecord = - cir::RecordType::get(&getMLIRContext(), resultTypes, false, false, - cir::RecordType::RecordKind::Struct); + cir::RecordType::get(&getMLIRContext(), {resVector, resVector}, false, + false, cir::RecordType::RecordKind::Struct); - llvm::SmallVector<mlir::Value, 2> callOps = {ops[0], ops[1]}; - - mlir::Value call = emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()), - intrinsicName, resRecord, callOps); + mlir::Value call = + emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()), intrinsicName, + resRecord, mlir::ValueRange{ops[0], ops[1]}); mlir::Value result = - cir::GetMemberValueOp::create(builder, loc, resVector, call, 0); + cir::ExtractMemberOp::create(builder, loc, resVector, call, 0); result = emitX86MaskedCompareResult(builder, loc, result, numElts, nullptr); Address addr = Address( ops[2], clang::CharUnits::fromQuantity(std::max(1U, numElts / 8))); builder.createStore(loc, result, addr); - result = cir::GetMemberValueOp::create(builder, loc, resVector, call, 1); + result = cir::ExtractMemberOp::create(builder, loc, resVector, call, 1); result = emitX86MaskedCompareResult(builder, loc, result, numElts, nullptr); addr = Address(ops[3], clang::CharUnits::fromQuantity(std::max(1U, numElts / 8))); diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512vlvp2intersect-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512vlvp2intersect-builtins.c index a07da240727a6..163fee11eefe4 100644 --- a/clang/test/CIR/CodeGenBuiltins/X86/avx512vlvp2intersect-builtins.c +++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512vlvp2intersect-builtins.c @@ -19,10 +19,10 @@ void test_mm256_2intersect_epi32(__m256i a, __m256i b, __mmask8 *m0, __mmask8 *m1) { // CIR-LABEL: mm256_2intersect_epi32 // CIR: %[[RES:.*]] = cir.call_llvm_intrinsic "x86.avx512.vp2intersect.d.256" %{{.*}}, %{{.*}} : (!cir.vector<8 x !s32i>, !cir.vector<8 x !s32i>) -> !rec_anon_struct - // CIR: %[[VAL1:.*]] = cir.get_member_value %[[RES]][0] : !rec_anon_struct -> !cir.vector<8 x !cir.bool> + // CIR: %[[VAL1:.*]] = cir.extract_member %[[RES]][0] : !rec_anon_struct -> !cir.vector<8 x !cir.bool> // CIR: %[[CAST1:.*]] = cir.cast bitcast %[[VAL1]] : !cir.vector<8 x !cir.bool> -> !u8i // CIR: cir.store align(1) %[[CAST1]], %{{.*}} : !u8i, !cir.ptr<!u8i> - // CIR: %[[VAL2:.*]] = cir.get_member_value %[[RES]][1] : !rec_anon_struct -> !cir.vector<8 x !cir.bool> + // CIR: %[[VAL2:.*]] = cir.extract_member %[[RES]][1] : !rec_anon_struct -> !cir.vector<8 x !cir.bool> // CIR: %[[CAST2:.*]] = cir.cast bitcast %[[VAL2]] : !cir.vector<8 x !cir.bool> -> !u8i // CIR: cir.store align(1) %[[CAST2]], %{{.*}} : !u8i, !cir.ptr<!u8i> @@ -49,12 +49,12 @@ void test_mm256_2intersect_epi32(__m256i a, __m256i b, __mmask8 *m0, __mmask8 *m void test_mm256_2intersect_epi64(__m256i a, __m256i b, __mmask8 *m0, __mmask8 *m1) { // CIR-LABEL: mm256_2intersect_epi64 // CIR: %[[RES:.*]] = cir.call_llvm_intrinsic "x86.avx512.vp2intersect.q.256" %{{.*}}, %{{.*}} : (!cir.vector<4 x !s64i>, !cir.vector<4 x !s64i>) -> !rec_anon_struct1 - // CIR: %[[VAL1:.*]] = cir.get_member_value %[[RES]][0] : !rec_anon_struct1 -> !cir.vector<4 x !cir.bool> + // CIR: %[[VAL1:.*]] = cir.extract_member %[[RES]][0] : !rec_anon_struct1 -> !cir.vector<4 x !cir.bool> // CIR: %[[ZERO1:.*]] = cir.const #cir.zero : !cir.vector<4 x !cir.bool> // CIR: %[[SHUF1:.*]] = cir.vec.shuffle(%[[VAL1]], %[[ZERO1]] : !cir.vector<4 x !cir.bool>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<6> : !s32i, #cir.int<7> : !s32i] : !cir.vector<8 x !cir.bool> // CIR: %[[CAST1:.*]] = cir.cast bitcast %[[SHUF1]] : !cir.vector<8 x !cir.bool> -> !u8i // CIR: cir.store align(1) %[[CAST1]], %{{.*}} : !u8i, !cir.ptr<!u8i> - // CIR: %[[VAL2:.*]] = cir.get_member_value %[[RES]][1] : !rec_anon_struct1 -> !cir.vector<4 x !cir.bool> + // CIR: %[[VAL2:.*]] = cir.extract_member %[[RES]][1] : !rec_anon_struct1 -> !cir.vector<4 x !cir.bool> // CIR: %[[ZERO2:.*]] = cir.const #cir.zero : !cir.vector<4 x !cir.bool> // CIR: %[[SHUF2:.*]] = cir.vec.shuffle(%[[VAL2]], %[[ZERO2]] : !cir.vector<4 x !cir.bool>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<6> : !s32i, #cir.int<7> : !s32i] : !cir.vector<8 x !cir.bool> // CIR: %[[CAST2:.*]] = cir.cast bitcast %[[SHUF2]] : !cir.vector<8 x !cir.bool> -> !u8i @@ -87,12 +87,12 @@ void test_mm256_2intersect_epi64(__m256i a, __m256i b, __mmask8 *m0, __mmask8 *m void test_mm_2intersect_epi32(__m128i a, __m128i b, __mmask8 *m0, __mmask8 *m1) { // CIR-LABEL: mm_2intersect_epi32 // CIR: %[[RES:.*]] = cir.call_llvm_intrinsic "x86.avx512.vp2intersect.d.128" %{{.*}}, %{{.*}} : (!cir.vector<4 x !s32i>, !cir.vector<4 x !s32i>) -> !rec_anon_struct1 - // CIR: %[[VAL1:.*]] = cir.get_member_value %[[RES]][0] : !rec_anon_struct1 -> !cir.vector<4 x !cir.bool> + // CIR: %[[VAL1:.*]] = cir.extract_member %[[RES]][0] : !rec_anon_struct1 -> !cir.vector<4 x !cir.bool> // CIR: %[[ZERO1:.*]] = cir.const #cir.zero : !cir.vector<4 x !cir.bool> // CIR: %[[SHUF1:.*]] = cir.vec.shuffle(%[[VAL1]], %[[ZERO1]] : !cir.vector<4 x !cir.bool>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<6> : !s32i, #cir.int<7> : !s32i] : !cir.vector<8 x !cir.bool> // CIR: %[[CAST1:.*]] = cir.cast bitcast %[[SHUF1]] : !cir.vector<8 x !cir.bool> -> !u8i // CIR: cir.store align(1) %[[CAST1]], %{{.*}} : !u8i, !cir.ptr<!u8i> - // CIR: %[[VAL2:.*]] = cir.get_member_value %[[RES]][1] : !rec_anon_struct1 -> !cir.vector<4 x !cir.bool> + // CIR: %[[VAL2:.*]] = cir.extract_member %[[RES]][1] : !rec_anon_struct1 -> !cir.vector<4 x !cir.bool> // CIR: %[[ZERO2:.*]] = cir.const #cir.zero : !cir.vector<4 x !cir.bool> // CIR: %[[SHUF2:.*]] = cir.vec.shuffle(%[[VAL2]], %[[ZERO2]] : !cir.vector<4 x !cir.bool>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<6> : !s32i, #cir.int<7> : !s32i] : !cir.vector<8 x !cir.bool> // CIR: %[[CAST2:.*]] = cir.cast bitcast %[[SHUF2]] : !cir.vector<8 x !cir.bool> -> !u8i @@ -125,12 +125,12 @@ void test_mm_2intersect_epi32(__m128i a, __m128i b, __mmask8 *m0, __mmask8 *m1) void test_mm_2intersect_epi64(__m128i a, __m128i b, __mmask8 *m0, __mmask8 *m1) { // CIR-LABEL: mm_2intersect_epi64 // CIR: %[[RES:.*]] = cir.call_llvm_intrinsic "x86.avx512.vp2intersect.q.128" %{{.*}}, %{{.*}} : (!cir.vector<2 x !s64i>, !cir.vector<2 x !s64i>) -> !rec_anon_struct2 - // CIR: %[[VAL1:.*]] = cir.get_member_value %[[RES]][0] : !rec_anon_struct2 -> !cir.vector<2 x !cir.bool> + // CIR: %[[VAL1:.*]] = cir.extract_member %[[RES]][0] : !rec_anon_struct2 -> !cir.vector<2 x !cir.bool> // CIR: %[[ZERO1:.*]] = cir.const #cir.zero : !cir.vector<2 x !cir.bool> // CIR: %[[SHUF1:.*]] = cir.vec.shuffle(%[[VAL1]], %[[ZERO1]] : !cir.vector<2 x !cir.bool>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<8 x !cir.bool> // CIR: %[[CAST1:.*]] = cir.cast bitcast %[[SHUF1]] : !cir.vector<8 x !cir.bool> -> !u8i // CIR: cir.store align(1) %[[CAST1]], %{{.*}} : !u8i, !cir.ptr<!u8i> - // CIR: %[[VAL2:.*]] = cir.get_member_value %[[RES]][1] : !rec_anon_struct2 -> !cir.vector<2 x !cir.bool> + // CIR: %[[VAL2:.*]] = cir.extract_member %[[RES]][1] : !rec_anon_struct2 -> !cir.vector<2 x !cir.bool> // CIR: %[[ZERO2:.*]] = cir.const #cir.zero : !cir.vector<2 x !cir.bool> // CIR: %[[SHUF2:.*]] = cir.vec.shuffle(%[[VAL2]], %[[ZERO2]] : !cir.vector<2 x !cir.bool>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<8 x !cir.bool> // CIR: %[[CAST2:.*]] = cir.cast bitcast %[[SHUF2]] : !cir.vector<8 x !cir.bool> -> !u8i diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512vp2intersect-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512vp2intersect-builtins.c index 5856f88385e8d..384477454c43e 100644 --- a/clang/test/CIR/CodeGenBuiltins/X86/avx512vp2intersect-builtins.c +++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512vp2intersect-builtins.c @@ -19,10 +19,10 @@ void test_mm512_2intersect_epi32(__m512i a, __m512i b, __mmask16 *m0, __mmask16 *m1) { // CIR-LABEL: mm512_2intersect_epi32 // CIR: %[[RES:.*]] = cir.call_llvm_intrinsic "x86.avx512.vp2intersect.d.512" %{{.*}}, %{{.*}} : (!cir.vector<16 x !s32i>, !cir.vector<16 x !s32i>) -> !rec_anon_struct - // CIR: %[[VAL1:.*]] = cir.get_member_value %[[RES]][0] : !rec_anon_struct -> !cir.vector<16 x !cir.bool> + // CIR: %[[VAL1:.*]] = cir.extract_member %[[RES]][0] : !rec_anon_struct -> !cir.vector<16 x !cir.bool> // CIR: %[[CAST1:.*]] = cir.cast bitcast %[[VAL1]] : !cir.vector<16 x !cir.bool> -> !u16i // CIR: cir.store align(2) %[[CAST1]], %{{.*}} : !u16i, !cir.ptr<!u16i> - // CIR: %[[VAL2:.*]] = cir.get_member_value %[[RES]][1] : !rec_anon_struct -> !cir.vector<16 x !cir.bool> + // CIR: %[[VAL2:.*]] = cir.extract_member %[[RES]][1] : !rec_anon_struct -> !cir.vector<16 x !cir.bool> // CIR: %[[CAST2:.*]] = cir.cast bitcast %[[VAL2]] : !cir.vector<16 x !cir.bool> -> !u16i // CIR: cir.store align(2) %[[CAST2]], %{{.*}} : !u16i, !cir.ptr<!u16i> @@ -49,10 +49,10 @@ void test_mm512_2intersect_epi32(__m512i a, __m512i b, __mmask16 *m0, __mmask16 void test_mm512_2intersect_epi64(__m512i a, __m512i b, __mmask8 *m0, __mmask8 *m1) { // CIR-LABEL: mm512_2intersect_epi64 // CIR: %[[RES:.*]] = cir.call_llvm_intrinsic "x86.avx512.vp2intersect.q.512" %{{.*}}, %{{.*}} : (!cir.vector<8 x !s64i>, !cir.vector<8 x !s64i>) -> !rec_anon_struct1 - // CIR: %[[VAL1:.*]] = cir.get_member_value %[[RES]][0] : !rec_anon_struct1 -> !cir.vector<8 x !cir.bool> + // CIR: %[[VAL1:.*]] = cir.extract_member %[[RES]][0] : !rec_anon_struct1 -> !cir.vector<8 x !cir.bool> // CIR: %[[CAST1:.*]] = cir.cast bitcast %[[VAL1]] : !cir.vector<8 x !cir.bool> -> !u8i // CIR: cir.store align(1) %[[CAST1]], %{{.*}} : !u8i, !cir.ptr<!u8i> - // CIR: %[[VAL2:.*]] = cir.get_member_value %[[RES]][1] : !rec_anon_struct1 -> !cir.vector<8 x !cir.bool> + // CIR: %[[VAL2:.*]] = cir.extract_member %[[RES]][1] : !rec_anon_struct1 -> !cir.vector<8 x !cir.bool> // CIR: %[[CAST2:.*]] = cir.cast bitcast %[[VAL2]] : !cir.vector<8 x !cir.bool> -> !u8i // CIR: cir.store align(1) %[[CAST2]], %{{.*}} : !u8i, !cir.ptr<!u8i> >From 961fc04cbdd3c0222ca48b319349f3f6901868b7 Mon Sep 17 00:00:00 2001 From: vishruth-thimmaiah <[email protected]> Date: Fri, 9 Jan 2026 23:16:33 +0530 Subject: [PATCH 3/4] use existing emitX86MaskedCompareResult Signed-off-by: vishruth-thimmaiah <[email protected]> --- clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 47 +++++----------------- 1 file changed, 11 insertions(+), 36 deletions(-) diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp index bd9f2ecc87dbb..dcb87c9e02ea5 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp @@ -264,15 +264,15 @@ static mlir::Value emitX86MaskTest(CIRGenBuilderTy &builder, mlir::Location loc, mlir::ValueRange{lhsVec, rhsVec}); } -// TODO: The cgf parameter should be removed when all the NYI cases are -// implemented. -static std::optional<mlir::Value> -emitX86MaskedCompareResult(CIRGenFunction &cgf, CIRGenBuilderTy &builder, - mlir::Value cmp, unsigned numElts, - mlir::Value maskIn, mlir::Location loc) { +static mlir::Value emitX86MaskedCompareResult(CIRGenBuilderTy &builder, + mlir::Value cmp, unsigned numElts, + mlir::Value maskIn, + mlir::Location loc) { if (maskIn) { - cgf.cgm.errorNYI(loc, "emitX86MaskedCompareResult"); - return {}; + auto c = mlir::dyn_cast_or_null<cir::ConstantOp>(maskIn.getDefiningOp()); + if (!c || !c.isAllOnesValue()) + cmp = builder.createAnd(loc, cmp, + getMaskVecValue(builder, loc, maskIn, numElts)); } if (numElts < 8) { llvm::SmallVector<mlir::Attribute> indices; @@ -340,7 +340,7 @@ emitX86MaskedCompare(CIRGenFunction &cgf, CIRGenBuilderTy &builder, unsigned cc, if (ops.size() == 4) maskIn = ops[3]; - return emitX86MaskedCompareResult(cgf, builder, cmp, numElts, maskIn, loc); + return emitX86MaskedCompareResult(builder, cmp, numElts, maskIn, loc); } // TODO: The cgf parameter should be removed when all the NYI cases are @@ -542,31 +542,6 @@ static mlir::Value emitX86vpcom(CIRGenBuilderTy &builder, mlir::Location loc, return builder.createVecCompare(loc, pred, op0, op1); } -static mlir::Value emitX86MaskedCompareResult(CIRGenBuilderTy &builder, - mlir::Location loc, - mlir::Value cmp, unsigned numElts, - mlir::Value maskIn) { - if (maskIn) { - auto c = mlir::dyn_cast_or_null<cir::ConstantOp>(maskIn.getDefiningOp()); - if (!c || !c.isAllOnesValue()) - cmp = builder.createAnd(loc, cmp, - getMaskVecValue(builder, loc, maskIn, numElts)); - } - - if (numElts < 8) { - llvm::SmallVector<mlir::Attribute, 8> indices; - mlir::Type i32Ty = builder.getSInt32Ty(); - for (unsigned i = 0; i != numElts; ++i) - indices.push_back(cir::IntAttr::get(i32Ty, i)); - for (unsigned i = numElts; i != 8; ++i) - indices.push_back(cir::IntAttr::get(i32Ty, i % numElts + numElts)); - cmp = builder.createVecShuffle( - loc, cmp, builder.getNullValue(cmp.getType(), loc), indices); - } - - return builder.createBitcast(cmp, builder.getUIntNTy(std::max(numElts, 8U))); -} - std::optional<mlir::Value> CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, const CallExpr *expr) { if (builtinID == Builtin::BI__builtin_cpu_is) { @@ -1908,13 +1883,13 @@ CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, const CallExpr *expr) { resRecord, mlir::ValueRange{ops[0], ops[1]}); mlir::Value result = cir::ExtractMemberOp::create(builder, loc, resVector, call, 0); - result = emitX86MaskedCompareResult(builder, loc, result, numElts, nullptr); + result = emitX86MaskedCompareResult(builder, result, numElts, nullptr, loc); Address addr = Address( ops[2], clang::CharUnits::fromQuantity(std::max(1U, numElts / 8))); builder.createStore(loc, result, addr); result = cir::ExtractMemberOp::create(builder, loc, resVector, call, 1); - result = emitX86MaskedCompareResult(builder, loc, result, numElts, nullptr); + result = emitX86MaskedCompareResult(builder, result, numElts, nullptr, loc); addr = Address(ops[3], clang::CharUnits::fromQuantity(std::max(1U, numElts / 8))); builder.createStore(loc, result, addr); >From 72a56177e6293356f8702f84486b91789fac84b4 Mon Sep 17 00:00:00 2001 From: vishruth-thimmaiah <[email protected]> Date: Sat, 10 Jan 2026 00:01:37 +0530 Subject: [PATCH 4/4] fix failing tests Signed-off-by: vishruth-thimmaiah <[email protected]> --- .../X86/avx512vlvp2intersect-builtins.c | 36 +++++++++++-------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512vlvp2intersect-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512vlvp2intersect-builtins.c index 163fee11eefe4..db85d05ed9383 100644 --- a/clang/test/CIR/CodeGenBuiltins/X86/avx512vlvp2intersect-builtins.c +++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512vlvp2intersect-builtins.c @@ -51,23 +51,23 @@ void test_mm256_2intersect_epi64(__m256i a, __m256i b, __mmask8 *m0, __mmask8 *m // CIR: %[[RES:.*]] = cir.call_llvm_intrinsic "x86.avx512.vp2intersect.q.256" %{{.*}}, %{{.*}} : (!cir.vector<4 x !s64i>, !cir.vector<4 x !s64i>) -> !rec_anon_struct1 // CIR: %[[VAL1:.*]] = cir.extract_member %[[RES]][0] : !rec_anon_struct1 -> !cir.vector<4 x !cir.bool> // CIR: %[[ZERO1:.*]] = cir.const #cir.zero : !cir.vector<4 x !cir.bool> - // CIR: %[[SHUF1:.*]] = cir.vec.shuffle(%[[VAL1]], %[[ZERO1]] : !cir.vector<4 x !cir.bool>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<6> : !s32i, #cir.int<7> : !s32i] : !cir.vector<8 x !cir.bool> + // CIR: %[[SHUF1:.*]] = cir.vec.shuffle(%[[VAL1]], %[[ZERO1]] : !cir.vector<4 x !cir.bool>) [#cir.int<0> : !s64i, #cir.int<1> : !s64i, #cir.int<2> : !s64i, #cir.int<3> : !s64i, #cir.int<4> : !s64i, #cir.int<5> : !s64i, #cir.int<6> : !s64i, #cir.int<7> : !s64i] : !cir.vector<8 x !cir.bool> // CIR: %[[CAST1:.*]] = cir.cast bitcast %[[SHUF1]] : !cir.vector<8 x !cir.bool> -> !u8i // CIR: cir.store align(1) %[[CAST1]], %{{.*}} : !u8i, !cir.ptr<!u8i> // CIR: %[[VAL2:.*]] = cir.extract_member %[[RES]][1] : !rec_anon_struct1 -> !cir.vector<4 x !cir.bool> // CIR: %[[ZERO2:.*]] = cir.const #cir.zero : !cir.vector<4 x !cir.bool> - // CIR: %[[SHUF2:.*]] = cir.vec.shuffle(%[[VAL2]], %[[ZERO2]] : !cir.vector<4 x !cir.bool>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<6> : !s32i, #cir.int<7> : !s32i] : !cir.vector<8 x !cir.bool> + // CIR: %[[SHUF2:.*]] = cir.vec.shuffle(%[[VAL2]], %[[ZERO2]] : !cir.vector<4 x !cir.bool>) [#cir.int<0> : !s64i, #cir.int<1> : !s64i, #cir.int<2> : !s64i, #cir.int<3> : !s64i, #cir.int<4> : !s64i, #cir.int<5> : !s64i, #cir.int<6> : !s64i, #cir.int<7> : !s64i] : !cir.vector<8 x !cir.bool> // CIR: %[[CAST2:.*]] = cir.cast bitcast %[[SHUF2]] : !cir.vector<8 x !cir.bool> -> !u8i // CIR: cir.store align(1) %[[CAST2]], %{{.*}} : !u8i, !cir.ptr<!u8i> // LLVM-LABEL: test_mm256_2intersect_epi64 // LLVM: %[[RES:.*]] = call { <4 x i1>, <4 x i1> } @llvm.x86.avx512.vp2intersect.q.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}) // LLVM: %[[VAL1:.*]] = extractvalue { <4 x i1>, <4 x i1> } %{{.*}}, 0 - // LLVM: %[[SHUF1:.*]] = shufflevector <4 x i1> %[[VAL1]], <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> + // LLVM: %[[SHUF1:.*]] = shufflevector <4 x i1> %[[VAL1]], <4 x i1> zeroinitializer, <8 x i64> <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7> // LLVM: %[[CAST1:.*]] = bitcast <8 x i1> %[[SHUF1]] to i8 // LLVM: store i8 %[[CAST1]], ptr %{{.*}}, align 1 // LLVM: %[[VAL2:.*]] = extractvalue { <4 x i1>, <4 x i1> } %{{.*}}, 1 - // LLVM: %[[SHUF2:.*]] = shufflevector <4 x i1> %[[VAL2]], <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> + // LLVM: %[[SHUF2:.*]] = shufflevector <4 x i1> %[[VAL2]], <4 x i1> zeroinitializer, <8 x i64> <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7> // LLVM: %[[CAST2:.*]] = bitcast <8 x i1> %[[SHUF2]] to i8 // LLVM: store i8 %[[CAST2]], ptr %{{.*}}, align 1 @@ -89,23 +89,23 @@ void test_mm_2intersect_epi32(__m128i a, __m128i b, __mmask8 *m0, __mmask8 *m1) // CIR: %[[RES:.*]] = cir.call_llvm_intrinsic "x86.avx512.vp2intersect.d.128" %{{.*}}, %{{.*}} : (!cir.vector<4 x !s32i>, !cir.vector<4 x !s32i>) -> !rec_anon_struct1 // CIR: %[[VAL1:.*]] = cir.extract_member %[[RES]][0] : !rec_anon_struct1 -> !cir.vector<4 x !cir.bool> // CIR: %[[ZERO1:.*]] = cir.const #cir.zero : !cir.vector<4 x !cir.bool> - // CIR: %[[SHUF1:.*]] = cir.vec.shuffle(%[[VAL1]], %[[ZERO1]] : !cir.vector<4 x !cir.bool>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<6> : !s32i, #cir.int<7> : !s32i] : !cir.vector<8 x !cir.bool> + // CIR: %[[SHUF1:.*]] = cir.vec.shuffle(%[[VAL1]], %[[ZERO1]] : !cir.vector<4 x !cir.bool>) [#cir.int<0> : !s64i, #cir.int<1> : !s64i, #cir.int<2> : !s64i, #cir.int<3> : !s64i, #cir.int<4> : !s64i, #cir.int<5> : !s64i, #cir.int<6> : !s64i, #cir.int<7> : !s64i] : !cir.vector<8 x !cir.bool> // CIR: %[[CAST1:.*]] = cir.cast bitcast %[[SHUF1]] : !cir.vector<8 x !cir.bool> -> !u8i // CIR: cir.store align(1) %[[CAST1]], %{{.*}} : !u8i, !cir.ptr<!u8i> // CIR: %[[VAL2:.*]] = cir.extract_member %[[RES]][1] : !rec_anon_struct1 -> !cir.vector<4 x !cir.bool> // CIR: %[[ZERO2:.*]] = cir.const #cir.zero : !cir.vector<4 x !cir.bool> - // CIR: %[[SHUF2:.*]] = cir.vec.shuffle(%[[VAL2]], %[[ZERO2]] : !cir.vector<4 x !cir.bool>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<6> : !s32i, #cir.int<7> : !s32i] : !cir.vector<8 x !cir.bool> + // CIR: %[[SHUF2:.*]] = cir.vec.shuffle(%[[VAL2]], %[[ZERO2]] : !cir.vector<4 x !cir.bool>) [#cir.int<0> : !s64i, #cir.int<1> : !s64i, #cir.int<2> : !s64i, #cir.int<3> : !s64i, #cir.int<4> : !s64i, #cir.int<5> : !s64i, #cir.int<6> : !s64i, #cir.int<7> : !s64i] : !cir.vector<8 x !cir.bool> // CIR: %[[CAST2:.*]] = cir.cast bitcast %[[SHUF2]] : !cir.vector<8 x !cir.bool> -> !u8i // CIR: cir.store align(1) %[[CAST2]], %{{.*}} : !u8i, !cir.ptr<!u8i> // LLVM-LABEL: test_mm_2intersect_epi32 // LLVM: %[[RES:.*]] = call { <4 x i1>, <4 x i1> } @llvm.x86.avx512.vp2intersect.d.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}) // LLVM: %[[VAL1:.*]] = extractvalue { <4 x i1>, <4 x i1> } %{{.*}}, 0 - // LLVM: %[[SHUF1:.*]] = shufflevector <4 x i1> %[[VAL1]], <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> + // LLVM: %[[SHUF1:.*]] = shufflevector <4 x i1> %[[VAL1]], <4 x i1> zeroinitializer, <8 x i64> <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7> // LLVM: %[[CAST1:.*]] = bitcast <8 x i1> %[[SHUF1]] to i8 // LLVM: store i8 %[[CAST1]], ptr %{{.*}}, align 1 // LLVM: %[[VAL2:.*]] = extractvalue { <4 x i1>, <4 x i1> } %{{.*}}, 1 - // LLVM: %[[SHUF2:.*]] = shufflevector <4 x i1> %[[VAL2]], <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> + // LLVM: %[[SHUF2:.*]] = shufflevector <4 x i1> %[[VAL2]], <4 x i1> zeroinitializer, <8 x i64> <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7> // LLVM: %[[CAST2:.*]] = bitcast <8 x i1> %[[SHUF2]] to i8 // LLVM: store i8 %[[CAST2]], ptr %{{.*}}, align 1 @@ -127,29 +127,35 @@ void test_mm_2intersect_epi64(__m128i a, __m128i b, __mmask8 *m0, __mmask8 *m1) // CIR: %[[RES:.*]] = cir.call_llvm_intrinsic "x86.avx512.vp2intersect.q.128" %{{.*}}, %{{.*}} : (!cir.vector<2 x !s64i>, !cir.vector<2 x !s64i>) -> !rec_anon_struct2 // CIR: %[[VAL1:.*]] = cir.extract_member %[[RES]][0] : !rec_anon_struct2 -> !cir.vector<2 x !cir.bool> // CIR: %[[ZERO1:.*]] = cir.const #cir.zero : !cir.vector<2 x !cir.bool> - // CIR: %[[SHUF1:.*]] = cir.vec.shuffle(%[[VAL1]], %[[ZERO1]] : !cir.vector<2 x !cir.bool>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<8 x !cir.bool> + // CIR: %[[SHUF1:.*]] = cir.vec.shuffle(%[[VAL1]], %[[ZERO1]] : !cir.vector<2 x !cir.bool>) [#cir.int<0> : !s64i, #cir.int<1> : !s64i, #cir.int<2> : !s64i, #cir.int<3> : !s64i, #cir.int<2> : !s64i, #cir.int<3> : !s64i, #cir.int<2> : !s64i, #cir.int<3> : !s64i] : !cir.vector<8 x !cir.bool> // CIR: %[[CAST1:.*]] = cir.cast bitcast %[[SHUF1]] : !cir.vector<8 x !cir.bool> -> !u8i // CIR: cir.store align(1) %[[CAST1]], %{{.*}} : !u8i, !cir.ptr<!u8i> // CIR: %[[VAL2:.*]] = cir.extract_member %[[RES]][1] : !rec_anon_struct2 -> !cir.vector<2 x !cir.bool> // CIR: %[[ZERO2:.*]] = cir.const #cir.zero : !cir.vector<2 x !cir.bool> - // CIR: %[[SHUF2:.*]] = cir.vec.shuffle(%[[VAL2]], %[[ZERO2]] : !cir.vector<2 x !cir.bool>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<8 x !cir.bool> + // CIR: %[[SHUF2:.*]] = cir.vec.shuffle(%[[VAL2]], %[[ZERO2]] : !cir.vector<2 x !cir.bool>) [#cir.int<0> : !s64i, #cir.int<1> : !s64i, #cir.int<2> : !s64i, #cir.int<3> : !s64i, #cir.int<2> : !s64i, #cir.int<3> : !s64i, #cir.int<2> : !s64i, #cir.int<3> : !s64i] : !cir.vector<8 x !cir.bool> // CIR: %[[CAST2:.*]] = cir.cast bitcast %[[SHUF2]] : !cir.vector<8 x !cir.bool> -> !u8i // CIR: cir.store align(1) %[[CAST2]], %{{.*}} : !u8i, !cir.ptr<!u8i> // LLVM-LABEL: test_mm_2intersect_epi64 // LLVM: %[[RES:.*]] = call { <2 x i1>, <2 x i1> } @llvm.x86.avx512.vp2intersect.q.128(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}) // LLVM: %[[VAL1:.*]] = extractvalue { <2 x i1>, <2 x i1> } %{{.*}}, 0 - // LLVM: %[[SHUF1:.*]] = shufflevector <2 x i1> %[[VAL1]], <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> + // LLVM: %[[SHUF1:.*]] = shufflevector <2 x i1> %[[VAL1]], <2 x i1> zeroinitializer, <8 x i64> <i64 0, i64 1, i64 2, i64 3, i64 2, i64 3, i64 2, i64 3> // LLVM: %[[CAST1:.*]] = bitcast <8 x i1> %[[SHUF1]] to i8 // LLVM: store i8 %[[CAST1]], ptr %{{.*}}, align 1 // LLVM: %[[VAL2:.*]] = extractvalue { <2 x i1>, <2 x i1> } %{{.*}}, 1 - // LLVM: %[[SHUF2:.*]] = shufflevector <2 x i1> %[[VAL2]], <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> + // LLVM: %[[SHUF2:.*]] = shufflevector <2 x i1> %[[VAL2]], <2 x i1> zeroinitializer, <8 x i64> <i64 0, i64 1, i64 2, i64 3, i64 2, i64 3, i64 2, i64 3> // LLVM: %[[CAST2:.*]] = bitcast <8 x i1> %[[SHUF2]] to i8 // LLVM: store i8 %[[CAST2]], ptr %{{.*}}, align 1 // OGCG-LABEL: test_mm_2intersect_epi64 - // OGCG: %[[RES:.*]] = call { <2 x i1>, <2 x i1> } @llvm.x86.avx512.vp2intersect.q.128(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}) - // OGCG: %[[VAL1:.*]] = extractvalue { <2 x i1>, <2 x i1> } %{{.*}}, 0 - // OGCG: %[[VAL2:.*]] = extractvalue { <2 x i1>, <2 x i1> } %{{.*}}, 1 + // LLVM: %[[RES:.*]] = call { <2 x i1>, <2 x i1> } @llvm.x86.avx512.vp2intersect.q.128(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}) + // LLVM: %[[VAL1:.*]] = extractvalue { <2 x i1>, <2 x i1> } %{{.*}}, 0 + // LLVM: %[[SHUF1:.*]] = shufflevector <2 x i1> %[[VAL1]], <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> + // LLVM: %[[CAST1:.*]] = bitcast <8 x i1> %[[SHUF1]] to i8 + // LLVM: store i8 %[[CAST1]], ptr %{{.*}}, align 1 + // LLVM: %[[VAL2:.*]] = extractvalue { <2 x i1>, <2 x i1> } %{{.*}}, 1 + // LLVM: %[[SHUF2:.*]] = shufflevector <2 x i1> %[[VAL2]], <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> + // LLVM: %[[CAST2:.*]] = bitcast <8 x i1> %[[SHUF2]] to i8 + // LLVM: store i8 %[[CAST2]], ptr %{{.*}}, align 1 _mm_2intersect_epi64(a, b, m0, m1); } _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
