https://github.com/badumbatish updated https://github.com/llvm/llvm-project/pull/169157
>From 09aaf71c66fc7262554fe197cdd2dd2764fa5ee3 Mon Sep 17 00:00:00 2001 From: Jasmine Tang <[email protected]> Date: Wed, 19 Nov 2025 06:16:32 -0800 Subject: [PATCH 1/2] Gather implementation --- clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 109 +++++++++++++++++- clang/test/CIR/CodeGen/X86/avx512f-builtins.c | 96 +++++++++++++++ 2 files changed, 204 insertions(+), 1 deletion(-) diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp index 978fee7dbec9d..5eb20a2437ae5 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp @@ -68,6 +68,27 @@ static mlir::Value emitVectorFCmp(CIRGenBuilderTy &builder, return bitCast; } +// Convert the mask from an integer type to a vector of i1. +static mlir::Value getMaskVecValue(CIRGenFunction &cgf, mlir::Value mask, + unsigned numElts, mlir::Location loc) { + cir::VectorType maskTy = + cir::VectorType::get(cgf.getBuilder().getSIntNTy(1), + cast<cir::IntType>(mask.getType()).getWidth()); + + mlir::Value maskVec = cgf.getBuilder().createBitcast(mask, maskTy); + + // If we have less than 8 elements, then the starting mask was an i8 and + // we need to extract down to the right number of elements. + if (numElts < 8) { + llvm::SmallVector<int64_t, 4> indices; + for (unsigned i = 0; i != numElts; ++i) + indices.push_back(i); + maskVec = cgf.getBuilder().createVecShuffle(loc, maskVec, maskVec, indices); + } + + return maskVec; +} + mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, const CallExpr *expr) { if (builtinID == Builtin::BI__builtin_cpu_is) { @@ -456,7 +477,93 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, case X86::BI__builtin_ia32_gathersiv8di: case X86::BI__builtin_ia32_gathersiv16si: case X86::BI__builtin_ia32_gatherdiv8di: - case X86::BI__builtin_ia32_gatherdiv16si: + case X86::BI__builtin_ia32_gatherdiv16si: { + StringRef intrinsicName; + switch (builtinID) { + default: + llvm_unreachable("Unexpected builtin"); + case X86::BI__builtin_ia32_gather3div2df: + intrinsicName = "x86.avx512.mask.gather3div2.df"; + break; + case X86::BI__builtin_ia32_gather3div2di: + intrinsicName = "x86.avx512.mask.gather3div2.di"; + break; + case X86::BI__builtin_ia32_gather3div4df: + intrinsicName = "x86.avx512.mask.gather3div4.df"; + break; + case X86::BI__builtin_ia32_gather3div4di: + intrinsicName = "x86.avx512.mask.gather3div4.di"; + break; + case X86::BI__builtin_ia32_gather3div4sf: + intrinsicName = "x86.avx512.mask.gather3div4.sf"; + break; + case X86::BI__builtin_ia32_gather3div4si: + intrinsicName = "x86.avx512.mask.gather3div4.si"; + break; + case X86::BI__builtin_ia32_gather3div8sf: + intrinsicName = "x86.avx512.mask.gather3div8.sf"; + break; + case X86::BI__builtin_ia32_gather3div8si: + intrinsicName = "x86.avx512.mask.gather3div8.si"; + break; + case X86::BI__builtin_ia32_gather3siv2df: + intrinsicName = "x86.avx512.mask.gather3siv2.df"; + break; + case X86::BI__builtin_ia32_gather3siv2di: + intrinsicName = "x86.avx512.mask.gather3siv2.di"; + break; + case X86::BI__builtin_ia32_gather3siv4df: + intrinsicName = "x86.avx512.mask.gather3siv4.df"; + break; + case X86::BI__builtin_ia32_gather3siv4di: + intrinsicName = "x86.avx512.mask.gather3siv4.di"; + break; + case X86::BI__builtin_ia32_gather3siv4sf: + intrinsicName = "x86.avx512.mask.gather3siv4.sf"; + break; + case X86::BI__builtin_ia32_gather3siv4si: + intrinsicName = "x86.avx512.mask.gather3siv4.si"; + break; + case X86::BI__builtin_ia32_gather3siv8sf: + intrinsicName = "x86.avx512.mask.gather3siv8.sf"; + break; + case X86::BI__builtin_ia32_gather3siv8si: + intrinsicName = "x86.avx512.mask.gather3siv8.si"; + break; + case X86::BI__builtin_ia32_gathersiv8df: + intrinsicName = "x86.avx512.mask.gather.dpd.512"; + break; + case X86::BI__builtin_ia32_gathersiv16sf: + intrinsicName = "x86.avx512.mask.gather.dps.512"; + break; + case X86::BI__builtin_ia32_gatherdiv8df: + intrinsicName = "x86.avx512.mask.gather.qpd.512"; + break; + case X86::BI__builtin_ia32_gatherdiv16sf: + intrinsicName = "x86.avx512.mask.gather.qps.512"; + break; + case X86::BI__builtin_ia32_gathersiv8di: + intrinsicName = "x86.avx512.mask.gather.dpq.512"; + break; + case X86::BI__builtin_ia32_gathersiv16si: + intrinsicName = "x86.avx512.mask.gather.dpi.512"; + break; + case X86::BI__builtin_ia32_gatherdiv8di: + intrinsicName = "x86.avx512.mask.gather.qpq.512"; + break; + case X86::BI__builtin_ia32_gatherdiv16si: + intrinsicName = "x86.avx512.mask.gather.qpi.512"; + break; + } + + unsigned minElts = + std::min(cast<cir::VectorType>(ops[0].getType()).getSize(), + cast<cir::VectorType>(ops[2].getType()).getSize()); + ops[3] = + getMaskVecValue(*this, ops[3], minElts, getLoc(expr->getExprLoc())); + return emitIntrinsicCallOp(*this, expr, intrinsicName.str(), + convertType(expr->getType()), ops); + } case X86::BI__builtin_ia32_scattersiv8df: case X86::BI__builtin_ia32_scattersiv16sf: case X86::BI__builtin_ia32_scatterdiv8df: diff --git a/clang/test/CIR/CodeGen/X86/avx512f-builtins.c b/clang/test/CIR/CodeGen/X86/avx512f-builtins.c index dc54a87856a7c..e95e5f95e3513 100644 --- a/clang/test/CIR/CodeGen/X86/avx512f-builtins.c +++ b/clang/test/CIR/CodeGen/X86/avx512f-builtins.c @@ -77,3 +77,99 @@ __m512i test_mm512_undefined_epi32(void) { // OGCG: ret <8 x i64> zeroinitializer return _mm512_undefined_epi32(); } + +__m256 test_mm512_i64gather_ps(__m512i __index, void const *__addr) { + // CHECK-LABEL: test_mm512_i64gather_ps + // CHECK: @llvm.x86.avx512.mask.gather.qps.512 + return _mm512_i64gather_ps(__index, __addr, 2); +} + +__m256 test_mm512_mask_i64gather_ps(__m256 __v1_old, __mmask8 __mask, __m512i __index, void const *__addr) { + // CHECK-LABEL: test_mm512_mask_i64gather_ps + // CHECK: @llvm.x86.avx512.mask.gather.qps.512 + return _mm512_mask_i64gather_ps(__v1_old, __mask, __index, __addr, 2); +} + +__m256i test_mm512_i64gather_epi32(__m512i __index, void const *__addr) { + // CHECK-LABEL: test_mm512_i64gather_epi32 + // CHECK: @llvm.x86.avx512.mask.gather.qpi.512 + return _mm512_i64gather_epi32(__index, __addr, 2); +} + +__m256i test_mm512_mask_i64gather_epi32(__m256i __v1_old, __mmask8 __mask, __m512i __index, void const *__addr) { + // CHECK-LABEL: test_mm512_mask_i64gather_epi32 + // CHECK: @llvm.x86.avx512.mask.gather.qpi.512 + return _mm512_mask_i64gather_epi32(__v1_old, __mask, __index, __addr, 2); +} + +__m512d test_mm512_i64gather_pd(__m512i __index, void const *__addr) { + // CHECK-LABEL: test_mm512_i64gather_pd + // CHECK: @llvm.x86.avx512.mask.gather.qpd.512 + return _mm512_i64gather_pd(__index, __addr, 2); +} + +__m512d test_mm512_mask_i64gather_pd(__m512d __v1_old, __mmask8 __mask, __m512i __index, void const *__addr) { + // CHECK-LABEL: test_mm512_mask_i64gather_pd + // CHECK: @llvm.x86.avx512.mask.gather.qpd.512 + return _mm512_mask_i64gather_pd(__v1_old, __mask, __index, __addr, 2); +} + +__m512i test_mm512_i64gather_epi64(__m512i __index, void const *__addr) { + // CHECK-LABEL: test_mm512_i64gather_epi64 + // CHECK: @llvm.x86.avx512.mask.gather.qpq.512 + return _mm512_i64gather_epi64(__index, __addr, 2); +} + +__m512i test_mm512_mask_i64gather_epi64(__m512i __v1_old, __mmask8 __mask, __m512i __index, void const *__addr) { + // CHECK-LABEL: test_mm512_mask_i64gather_epi64 + // CHECK: @llvm.x86.avx512.mask.gather.qpq.512 + return _mm512_mask_i64gather_epi64(__v1_old, __mask, __index, __addr, 2); +} + +__m512 test_mm512_i32gather_ps(__m512i __index, void const *__addr) { + // CHECK-LABEL: test_mm512_i32gather_ps + // CHECK: @llvm.x86.avx512.mask.gather.dps.512 + return _mm512_i32gather_ps(__index, __addr, 2); +} + +__m512 test_mm512_mask_i32gather_ps(__m512 v1_old, __mmask16 __mask, __m512i __index, void const *__addr) { + // CHECK-LABEL: test_mm512_mask_i32gather_ps + // CHECK: @llvm.x86.avx512.mask.gather.dps.512 + return _mm512_mask_i32gather_ps(v1_old, __mask, __index, __addr, 2); +} + +__m512i test_mm512_i32gather_epi32(__m512i __index, void const *__addr) { + // CHECK-LABEL: test_mm512_i32gather_epi32 + // CHECK: @llvm.x86.avx512.mask.gather.dpi.512 + return _mm512_i32gather_epi32(__index, __addr, 2); +} + +__m512i test_mm512_mask_i32gather_epi32(__m512i __v1_old, __mmask16 __mask, __m512i __index, void const *__addr) { + // CHECK-LABEL: test_mm512_mask_i32gather_epi32 + // CHECK: @llvm.x86.avx512.mask.gather.dpi.512 + return _mm512_mask_i32gather_epi32(__v1_old, __mask, __index, __addr, 2); +} + +__m512d test_mm512_i32gather_pd(__m256i __index, void const *__addr) { + // CHECK-LABEL: test_mm512_i32gather_pd + // CHECK: @llvm.x86.avx512.mask.gather.dpd.512 + return _mm512_i32gather_pd(__index, __addr, 2); +} + +__m512d test_mm512_mask_i32gather_pd(__m512d __v1_old, __mmask8 __mask, __m256i __index, void const *__addr) { + // CHECK-LABEL: test_mm512_mask_i32gather_pd + // CHECK: @llvm.x86.avx512.mask.gather.dpd.512 + return _mm512_mask_i32gather_pd(__v1_old, __mask, __index, __addr, 2); +} + +__m512i test_mm512_i32gather_epi64(__m256i __index, void const *__addr) { + // CHECK-LABEL: test_mm512_i32gather_epi64 + // CHECK: @llvm.x86.avx512.mask.gather.dpq.512 + return _mm512_i32gather_epi64(__index, __addr, 2); +} + +__m512i test_mm512_mask_i32gather_epi64(__m512i __v1_old, __mmask8 __mask, __m256i __index, void const *__addr) { + // CHECK-LABEL: test_mm512_mask_i32gather_epi64 + // CHECK: @llvm.x86.avx512.mask.gather.dpq.512 + return _mm512_mask_i32gather_epi64(__v1_old, __mask, __index, __addr, 2); +} >From 5972857f71bc6a0016022a877ef6e680fce4d745 Mon Sep 17 00:00:00 2001 From: Jasmine Tang <[email protected]> Date: Sat, 22 Nov 2025 01:56:09 -0800 Subject: [PATCH 2/2] Add clang/test/CIR/CodeGen/X86/avx512vl-builtins.c --- .../test/CIR/CodeGen/X86/avx512vl-builtins.c | 151 ++++++++++++++++++ 1 file changed, 151 insertions(+) create mode 100644 clang/test/CIR/CodeGen/X86/avx512vl-builtins.c diff --git a/clang/test/CIR/CodeGen/X86/avx512vl-builtins.c b/clang/test/CIR/CodeGen/X86/avx512vl-builtins.c new file mode 100644 index 0000000000000..08b40ad033b24 --- /dev/null +++ b/clang/test/CIR/CodeGen/X86/avx512vl-builtins.c @@ -0,0 +1,151 @@ +// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512f -target-feature +avx512vl -fclangir -emit-cir -o %t.cir -Wall -Werror -Wsign-conversion +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s +// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512f -target-feature +avx512vl -fclangir -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion +// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s + + +#include <immintrin.h> + +__m128d test_mm_mmask_i64gather_pd(__m128d __v1_old, __mmask8 __mask, __m128i __index, void const *__addr) { + // CIR-LABEL: test_mm_mmask_i64gather_pd + // CIR: cir.call_llvm_intrinsic "x86.avx512.mask.gather3div2.df" + + // LLVM-LABEL: @test_mm_mmask_i64gather_pd + // LLVM: @llvm.x86.avx512.mask.gather3div2.df + return _mm_mmask_i64gather_pd(__v1_old, __mask, __index, __addr, 2); +} + +__m128i test_mm_mmask_i64gather_epi64(__m128i __v1_old, __mmask8 __mask, __m128i __index, void const *__addr) { + // CIR-LABEL: test_mm_mmask_i64gather_epi64 + // CIR: cir.call_llvm_intrinsic "x86.avx512.mask.gather3div2.di" + + // LLVM-LABEL: @test_mm_mmask_i64gather_epi64 + // LLVM: @llvm.x86.avx512.mask.gather3div2.di + return _mm_mmask_i64gather_epi64(__v1_old, __mask, __index, __addr, 2); +} + +__m256d test_mm256_mmask_i64gather_pd(__m256d __v1_old, __mmask8 __mask, __m256i __index, void const *__addr) { + // CIR-LABEL: test_mm256_mmask_i64gather_pd + // CIR: cir.call_llvm_intrinsic "x86.avx512.mask.gather3div4.df" + + // LLVM-LABEL: @test_mm256_mmask_i64gather_pd + // LLVM: @llvm.x86.avx512.mask.gather3div4.df + return _mm256_mmask_i64gather_pd(__v1_old, __mask, __index, __addr, 2); +} + +__m256i test_mm256_mmask_i64gather_epi64(__m256i __v1_old, __mmask8 __mask, __m256i __index, void const *__addr) { + // CIR-LABEL: test_mm256_mmask_i64gather_epi64 + // CIR: cir.call_llvm_intrinsic "x86.avx512.mask.gather3div4.di" + + // LLVM-LABEL: @test_mm256_mmask_i64gather_epi64 + // LLVM: @llvm.x86.avx512.mask.gather3div4.di + return _mm256_mmask_i64gather_epi64(__v1_old, __mask, __index, __addr, 2); +} + +__m128 test_mm_mmask_i64gather_ps(__m128 __v1_old, __mmask8 __mask, __m128i __index, void const *__addr) { + // CIR-LABEL: test_mm_mmask_i64gather_ps + // CIR: cir.call_llvm_intrinsic "x86.avx512.mask.gather3div4.sf" + + // LLVM-LABEL: @test_mm_mmask_i64gather_ps + // LLVM: @llvm.x86.avx512.mask.gather3div4.sf + return _mm_mmask_i64gather_ps(__v1_old, __mask, __index, __addr, 2); +} + +__m128i test_mm_mmask_i64gather_epi32(__m128i __v1_old, __mmask8 __mask, __m128i __index, void const *__addr) { + // CIR-LABEL: test_mm_mmask_i64gather_epi32 + // CIR: cir.call_llvm_intrinsic "x86.avx512.mask.gather3div4.si" + + // LLVM-LABEL: @test_mm_mmask_i64gather_epi32 + // LLVM: @llvm.x86.avx512.mask.gather3div4.si + return _mm_mmask_i64gather_epi32(__v1_old, __mask, __index, __addr, 2); +} + +__m128 test_mm256_mmask_i64gather_ps(__m128 __v1_old, __mmask8 __mask, __m256i __index, void const *__addr) { + // CIR-LABEL: test_mm256_mmask_i64gather_ps + // CIR: cir.call_llvm_intrinsic "x86.avx512.mask.gather3div8.sf" + + // LLVM-LABEL: @test_mm256_mmask_i64gather_ps + // LLVM: @llvm.x86.avx512.mask.gather3div8.sf + return _mm256_mmask_i64gather_ps(__v1_old, __mask, __index, __addr, 2); +} + +__m128i test_mm256_mmask_i64gather_epi32(__m128i __v1_old, __mmask8 __mask, __m256i __index, void const *__addr) { + // CIR-LABEL: test_mm256_mmask_i64gather_epi32 + // CIR: cir.call_llvm_intrinsic "x86.avx512.mask.gather3div8.si" + + // LLVM-LABEL: @test_mm256_mmask_i64gather_epi32 + // LLVM: @llvm.x86.avx512.mask.gather3div8.si + return _mm256_mmask_i64gather_epi32(__v1_old, __mask, __index, __addr, 2); +} + +__m128d test_mm_mask_i32gather_pd(__m128d __v1_old, __mmask8 __mask, __m128i __index, void const *__addr) { + // CIR-LABEL: test_mm_mask_i32gather_pd + // CIR: cir.call_llvm_intrinsic "x86.avx512.mask.gather3siv2.df" + + // LLVM-LABEL: @test_mm_mask_i32gather_pd + // LLVM: @llvm.x86.avx512.mask.gather3siv2.df + return _mm_mmask_i32gather_pd(__v1_old, __mask, __index, __addr, 2); +} + +__m128i test_mm_mask_i32gather_epi64(__m128i __v1_old, __mmask8 __mask, __m128i __index, void const *__addr) { + // CIR-LABEL: test_mm_mask_i32gather_epi64 + // CIR: cir.call_llvm_intrinsic "x86.avx512.mask.gather3siv2.di" + + // LLVM-LABEL: @test_mm_mask_i32gather_epi64 + // LLVM: @llvm.x86.avx512.mask.gather3siv2.di + return _mm_mmask_i32gather_epi64(__v1_old, __mask, __index, __addr, 2); +} + +__m256d test_mm256_mask_i32gather_pd(__m256d __v1_old, __mmask8 __mask, __m128i __index, void const *__addr) { + // CIR-LABEL: test_mm256_mask_i32gather_pd + // CIR: cir.call_llvm_intrinsic "x86.avx512.mask.gather3siv4.df" + + // LLVM-LABEL: @test_mm256_mask_i32gather_pd + // LLVM: @llvm.x86.avx512.mask.gather3siv4.df + return _mm256_mmask_i32gather_pd(__v1_old, __mask, __index, __addr, 2); +} + +__m256i test_mm256_mask_i32gather_epi64(__m256i __v1_old, __mmask8 __mask, __m128i __index, void const *__addr) { + // CIR-LABEL: test_mm256_mask_i32gather_epi64 + // CIR: cir.call_llvm_intrinsic "x86.avx512.mask.gather3siv4.di" + + // LLVM-LABEL: @test_mm256_mask_i32gather_epi64 + // LLVM: @llvm.x86.avx512.mask.gather3siv4.di + return _mm256_mmask_i32gather_epi64(__v1_old, __mask, __index, __addr, 2); +} + +__m128 test_mm_mask_i32gather_ps(__m128 __v1_old, __mmask8 __mask, __m128i __index, void const *__addr) { + // CIR-LABEL: test_mm_mask_i32gather_ps + // CIR: cir.call_llvm_intrinsic "x86.avx512.mask.gather3siv4.sf" + + // LLVM-LABEL: @test_mm_mask_i32gather_ps + // LLVM: @llvm.x86.avx512.mask.gather3siv4.sf + return _mm_mmask_i32gather_ps(__v1_old, __mask, __index, __addr, 2); +} + +__m128i test_mm_mask_i32gather_epi32(__m128i __v1_old, __mmask8 __mask, __m128i __index, void const *__addr) { + // CIR-LABEL: test_mm_mask_i32gather_epi32 + // CIR: cir.call_llvm_intrinsic "x86.avx512.mask.gather3siv4.si" + + // LLVM-LABEL: @test_mm_mask_i32gather_epi32 + // LLVM: @llvm.x86.avx512.mask.gather3siv4.si + return _mm_mmask_i32gather_epi32(__v1_old, __mask, __index, __addr, 2); +} + +__m256 test_mm256_mask_i32gather_ps(__m256 __v1_old, __mmask8 __mask, __m256i __index, void const *__addr) { + // CIR-LABEL: test_mm256_mask_i32gather_ps + // CIR: cir.call_llvm_intrinsic "x86.avx512.mask.gather3siv8.sf" + + // LLVM-LABEL: @test_mm256_mask_i32gather_ps + // LLVM: @llvm.x86.avx512.mask.gather3siv8.sf + return _mm256_mmask_i32gather_ps(__v1_old, __mask, __index, __addr, 2); +} + +__m256i test_mm256_mask_i32gather_epi32(__m256i __v1_old, __mmask8 __mask, __m256i __index, void const *__addr) { + // CIR-LABEL: test_mm256_mask_i32gather_epi32 + // CIR: cir.call_llvm_intrinsic "x86.avx512.mask.gather3siv8.si" + + // LLVM-LABEL: @test_mm256_mask_i32gather_epi32 + // LLVM: @llvm.x86.avx512.mask.gather3siv8.si + return _mm256_mmask_i32gather_epi32(__v1_old, __mask, __index, __addr, 2); +} _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
