https://github.com/cs25resch11005-bhuvan updated https://github.com/llvm/llvm-project/pull/169582
>From 6bc66e5d10106aa3d13d9dfcc9f85c3f2259889d Mon Sep 17 00:00:00 2001 From: bhuvan1527 <[email protected]> Date: Wed, 26 Nov 2025 05:11:22 +0530 Subject: [PATCH 1/2] [CIR][CIRGen][Builtin][X86] Masked compress Intrinsics Added masked compress builtin in CIR. Note: This is my first PR to llvm. --- clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp index a0ee57f82a04f..fe595890b60f7 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp @@ -84,6 +84,14 @@ static mlir::Value getMaskVecValue(CIRGenBuilderTy &builder, mlir::Location loc, } return maskVec; } +static mlir::Value emitX86CompressExpand(CIRGenFunction &cgf, const CallExpr *expr,ArrayRef<mlir::Value> ops, bool IsCompress, const std::string &ID){ + auto ResultTy = cast<cir::VectorType>(ops[1].getType()); + mlir::Value MaskValue = getMaskVecValue(cgf, expr, ops[2], cast<cir::VectorType>(ResultTy).getSize()); + llvm::SmallVector<mlir::Value, 4> op{ops[0], ops[1], MaskValue}; + + return emitIntrinsicCallOp(cgf,expr, ID, ResultTy, op); + +} mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, const CallExpr *expr) { @@ -456,7 +464,9 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, case X86::BI__builtin_ia32_compresshi512_mask: case X86::BI__builtin_ia32_compressqi128_mask: case X86::BI__builtin_ia32_compressqi256_mask: - case X86::BI__builtin_ia32_compressqi512_mask: + case X86::BI__builtin_ia32_compressqi512_mask:{ + return emitX86CompressExpand(*this, expr, ops, true, "x86_avx512_mask_compress"); + } case X86::BI__builtin_ia32_gather3div2df: case X86::BI__builtin_ia32_gather3div2di: case X86::BI__builtin_ia32_gather3div4df: >From 1abcf4586e444a944e0ca30e2658c04bc7d6a24c Mon Sep 17 00:00:00 2001 From: bhuvan1527 <[email protected]> Date: Thu, 27 Nov 2025 19:59:41 +0530 Subject: [PATCH 2/2] [CIR][CIRGen][Builtin][X86] Masked compress Intrinsics This pr is related to the issue #167765 Added the support Masked compress builtin in CIR codeGen --- clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 36 ++-- .../CodeGenBuiltins/X86/avx512vl-builtins.c | 158 ++++++++++++++++++ .../X86/avx512vlvbmi2-builtins.c | 53 ++++++ 3 files changed, 227 insertions(+), 20 deletions(-) create mode 100644 clang/test/CIR/CodeGenBuiltins/X86/avx512vl-builtins.c create mode 100644 clang/test/CIR/CodeGenBuiltins/X86/avx512vlvbmi2-builtins.c diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp index fe595890b60f7..7da6e3b09661c 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp @@ -27,7 +27,7 @@ static mlir::Value emitIntrinsicCallOp(CIRGenBuilderTy &builder, Operands &&...op) { return cir::LLVMIntrinsicCallOp::create(builder, loc, builder.getStringAttr(str), resTy, - std::forward<Operands>(op)...) + {std::forward<Operands>(op)...}) .getResult(); } @@ -84,13 +84,11 @@ static mlir::Value getMaskVecValue(CIRGenBuilderTy &builder, mlir::Location loc, } return maskVec; } -static mlir::Value emitX86CompressExpand(CIRGenFunction &cgf, const CallExpr *expr,ArrayRef<mlir::Value> ops, bool IsCompress, const std::string &ID){ - auto ResultTy = cast<cir::VectorType>(ops[1].getType()); - mlir::Value MaskValue = getMaskVecValue(cgf, expr, ops[2], cast<cir::VectorType>(ResultTy).getSize()); - llvm::SmallVector<mlir::Value, 4> op{ops[0], ops[1], MaskValue}; - - return emitIntrinsicCallOp(cgf,expr, ID, ResultTy, op); - +static mlir::Value emitX86CompressExpand(CIRGenBuilderTy &builder, mlir::Location loc, mlir::Value source, mlir::Value mask, mlir::Value inputVector, const std::string &id){ + auto ResultTy = cast<cir::VectorType>(mask.getType()); + mlir::Value MaskValue = getMaskVecValue(builder, loc, inputVector, cast<cir::VectorType>(ResultTy).getSize()); + // SmallVector<mlir::Value,3> op{source, mask, MaskValue}; + return emitIntrinsicCallOp(builder, loc, id, ResultTy, source, mask, MaskValue); } mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, @@ -429,6 +427,10 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, case X86::BI__builtin_ia32_compressstoreqi128_mask: case X86::BI__builtin_ia32_compressstoreqi256_mask: case X86::BI__builtin_ia32_compressstoreqi512_mask: + cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented X86 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + return {}; case X86::BI__builtin_ia32_expanddf128_mask: case X86::BI__builtin_ia32_expanddf256_mask: case X86::BI__builtin_ia32_expanddf512_mask: @@ -446,7 +448,10 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, case X86::BI__builtin_ia32_expandhi512_mask: case X86::BI__builtin_ia32_expandqi128_mask: case X86::BI__builtin_ia32_expandqi256_mask: - case X86::BI__builtin_ia32_expandqi512_mask: + case X86::BI__builtin_ia32_expandqi512_mask:{ + mlir::Location loc = getLoc(expr->getExprLoc()); + return emitX86CompressExpand(builder, loc, ops[0], ops[1], ops[2], "x86_avx512_mask_expand"); + } case X86::BI__builtin_ia32_compressdf128_mask: case X86::BI__builtin_ia32_compressdf256_mask: case X86::BI__builtin_ia32_compressdf512_mask: @@ -465,7 +470,8 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, case X86::BI__builtin_ia32_compressqi128_mask: case X86::BI__builtin_ia32_compressqi256_mask: case X86::BI__builtin_ia32_compressqi512_mask:{ - return emitX86CompressExpand(*this, expr, ops, true, "x86_avx512_mask_compress"); + mlir::Location loc = getLoc(expr->getExprLoc()); + return emitX86CompressExpand(builder, loc, ops[0], ops[1], ops[2], "x86_avx512_mask_compress"); } case X86::BI__builtin_ia32_gather3div2df: case X86::BI__builtin_ia32_gather3div2di: @@ -791,16 +797,7 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, case X86::BI__builtin_ia32_sqrtsh_round_mask: case X86::BI__builtin_ia32_sqrtsd_round_mask: case X86::BI__builtin_ia32_sqrtss_round_mask: - case X86::BI__builtin_ia32_sqrtpd256: - case X86::BI__builtin_ia32_sqrtpd: - case X86::BI__builtin_ia32_sqrtps256: - case X86::BI__builtin_ia32_sqrtps: - case X86::BI__builtin_ia32_sqrtph256: - case X86::BI__builtin_ia32_sqrtph: case X86::BI__builtin_ia32_sqrtph512: - case X86::BI__builtin_ia32_vsqrtbf16256: - case X86::BI__builtin_ia32_vsqrtbf16: - case X86::BI__builtin_ia32_vsqrtbf16512: case X86::BI__builtin_ia32_sqrtps512: case X86::BI__builtin_ia32_sqrtpd512: case X86::BI__builtin_ia32_pmuludq128: @@ -953,7 +950,6 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, case X86::BI__builtin_ia32_vcvtph2ps256_mask: case X86::BI__builtin_ia32_vcvtph2ps512_mask: case X86::BI__builtin_ia32_cvtneps2bf16_128_mask: - case X86::BI__builtin_ia32_cvtsbf162ss_32: case X86::BI__builtin_ia32_cvtneps2bf16_256_mask: case X86::BI__builtin_ia32_cvtneps2bf16_512_mask: case X86::BI__cpuid: diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512vl-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512vl-builtins.c new file mode 100644 index 0000000000000..6a3076525eeef --- /dev/null +++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512vl-builtins.c @@ -0,0 +1,158 @@ +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512vlvbmi2 -fclangir -emit-cir -o %t.cir -Wall -Werror -Wsign-conversion +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512vlvbmi2 -fclangir -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion +// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s + +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512vlvbmi2 -fclangir -emit-cir -o %t.cir -Wall -Werror -Wsign-conversion +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512vlvbmi2 -fclangir -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion +// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s + +#include <immintrin.h> + + +__m128d test_mm_mask_expand_pd(__m128d __W, __mmask8 __U, __m128d __A) { + + return _mm_mask_expand_pd(__W,__U,__A); +} +__m128d test_mm_maskz_expand_pd(__mmask8 __U, __m128d __A) { + + return _mm_maskz_expand_pd(__U,__A); +} +__m256d test_mm256_mask_expand_pd(__m256d __W, __mmask8 __U, __m256d __A) { + + return _mm256_mask_expand_pd(__W,__U,__A); +} +__m256d test_mm256_maskz_expand_pd(__mmask8 __U, __m256d __A) { + + return _mm256_maskz_expand_pd(__U,__A); +} +__m128i test_mm_mask_expand_epi64(__m128i __W, __mmask8 __U, __m128i __A) { + + return _mm_mask_expand_epi64(__W,__U,__A); +} +__m128i test_mm_maskz_expand_epi64(__mmask8 __U, __m128i __A) { + + return _mm_maskz_expand_epi64(__U,__A); +} +__m256i test_mm256_mask_expand_epi64(__m256i __W, __mmask8 __U, __m256i __A) { + + return _mm256_mask_expand_epi64(__W,__U,__A); +} +__m256i test_mm256_maskz_expand_epi64(__mmask8 __U, __m256i __A) { + + return _mm256_maskz_expand_epi64(__U,__A); +} + +__m128 test_mm_mask_expand_ps(__m128 __W, __mmask8 __U, __m128 __A) { + + return _mm_mask_expand_ps(__W,__U,__A); +} +__m128 test_mm_maskz_expand_ps(__mmask8 __U, __m128 __A) { + + return _mm_maskz_expand_ps(__U,__A); +} +__m256 test_mm256_mask_expand_ps(__m256 __W, __mmask8 __U, __m256 __A) { + + return _mm256_mask_expand_ps(__W,__U,__A); +} +__m256 test_mm256_maskz_expand_ps(__mmask8 __U, __m256 __A) { + + return _mm256_maskz_expand_ps(__U,__A); +} +__m128i test_mm_mask_expand_epi32(__m128i __W, __mmask8 __U, __m128i __A) { + + return _mm_mask_expand_epi32(__W,__U,__A); +} +__m128i test_mm_maskz_expand_epi32(__mmask8 __U, __m128i __A) { + + return _mm_maskz_expand_epi32(__U,__A); +} +__m256i test_mm256_mask_expand_epi32(__m256i __W, __mmask8 __U, __m256i __A) { + + return _mm256_mask_expand_epi32(__W,__U,__A); +} +__m256i test_mm256_maskz_expand_epi32(__mmask8 __U, __m256i __A) { + + return _mm256_maskz_expand_epi32(__U,__A); +} + +__m128d test_mm_mask_compress_pd(__m128d __W, __mmask8 __U, __m128d __A) { + + return _mm_mask_compress_pd(__W,__U,__A); +} + +__m128d test_mm_maskz_compress_pd(__mmask8 __U, __m128d __A) { + + return _mm_maskz_compress_pd(__U,__A); +} + +__m256d test_mm256_mask_compress_pd(__m256d __W, __mmask8 __U, __m256d __A) { + + return _mm256_mask_compress_pd(__W,__U,__A); +} + +__m256d test_mm256_maskz_compress_pd(__mmask8 __U, __m256d __A) { + + return _mm256_maskz_compress_pd(__U,__A); +} + +__m128i test_mm_mask_compress_epi64(__m128i __W, __mmask8 __U, __m128i __A) { + + return _mm_mask_compress_epi64(__W,__U,__A); +} + +__m128i test_mm_maskz_compress_epi64(__mmask8 __U, __m128i __A) { + + return _mm_maskz_compress_epi64(__U,__A); +} + +__m256i test_mm256_mask_compress_epi64(__m256i __W, __mmask8 __U, __m256i __A) { + + return _mm256_mask_compress_epi64(__W,__U,__A); +} + +__m256i test_mm256_maskz_compress_epi64(__mmask8 __U, __m256i __A) { + + return _mm256_maskz_compress_epi64(__U,__A); +} + +__m128 test_mm_mask_compress_ps(__m128 __W, __mmask8 __U, __m128 __A) { + + return _mm_mask_compress_ps(__W,__U,__A); +} + +__m128 test_mm_maskz_compress_ps(__mmask8 __U, __m128 __A) { + + return _mm_maskz_compress_ps(__U,__A); +} + +__m256 test_mm256_mask_compress_ps(__m256 __W, __mmask8 __U, __m256 __A) { + + return _mm256_mask_compress_ps(__W,__U,__A); +} + +__m256 test_mm256_maskz_compress_ps(__mmask8 __U, __m256 __A) { + + return _mm256_maskz_compress_ps(__U,__A); +} + +__m128i test_mm_mask_compress_epi32(__m128i __W, __mmask8 __U, __m128i __A) { + + return _mm_mask_compress_epi32(__W,__U,__A); +} + +__m128i test_mm_maskz_compress_epi32(__mmask8 __U, __m128i __A) { + + return _mm_maskz_compress_epi32(__U,__A); +} + +__m256i test_mm256_mask_compress_epi32(__m256i __W, __mmask8 __U, __m256i __A) { + + return _mm256_mask_compress_epi32(__W,__U,__A); +} + +__m256i test_mm256_maskz_compress_epi32(__mmask8 __U, __m256i __A) { + + return _mm256_maskz_compress_epi32(__U,__A); +} diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512vlvbmi2-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512vlvbmi2-builtins.c new file mode 100644 index 0000000000000..5a7051bdf5692 --- /dev/null +++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512vlvbmi2-builtins.c @@ -0,0 +1,53 @@ + +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512vlvbmi2 -fclangir -emit-cir -o %t.cir -Wall -Werror -Wsign-conversion +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512vlvbmi2 -fclangir -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion +// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s + +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512vlvbmi2 -fclangir -emit-cir -o %t.cir -Wall -Werror -Wsign-conversion +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512vlvbmi2 -fclangir -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion +// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s + +#include <immintrin.h> + + +__m128i test_mm_mask_compress_epi16(__m128i __S, __mmask8 __U, __m128i __D) { + + return _mm_mask_compress_epi16(__S, __U, __D); +} + +__m128i test_mm_maskz_compress_epi16(__mmask8 __U, __m128i __D) { + + return _mm_maskz_compress_epi16(__U, __D); +} + +__m128i test_mm_mask_compress_epi8(__m128i __S, __mmask16 __U, __m128i __D) { + + return _mm_mask_compress_epi8(__S, __U, __D); +} + +__m128i test_mm_maskz_compress_epi8(__mmask16 __U, __m128i __D) { + + return _mm_maskz_compress_epi8(__U, __D); +} + +__m128i test_mm_mask_expand_epi16(__m128i __S, __mmask8 __U, __m128i __D) { + + return _mm_mask_expand_epi16(__S, __U, __D); +} + +__m128i test_mm_maskz_expand_epi16(__mmask8 __U, __m128i __D) { + + return _mm_maskz_expand_epi16(__U, __D); +} + +__m128i test_mm_mask_expand_epi8(__m128i __S, __mmask16 __U, __m128i __D) { + + return _mm_mask_expand_epi8(__S, __U, __D); +} + +__m128i test_mm_maskz_expand_epi8(__mmask16 __U, __m128i __D) { + + return _mm_maskz_expand_epi8(__U, __D); +} _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
