https://github.com/jhuber6 updated https://github.com/llvm/llvm-project/pull/154464
>From 6861a2b2b142e9550ec9d0642f93471652a73901 Mon Sep 17 00:00:00 2001 From: Joseph Huber <hube...@outlook.com> Date: Tue, 19 Aug 2025 23:01:03 -0500 Subject: [PATCH] [Clang] Add builtins for masked vector loads / stores Summary: Clang has support for boolean vectors, these builtins expose the LLVM instruction of the same name. This differs from a manual load and select by potentially suppressing traps from deactivated lanes. Fixes: https://github.com/llvm/llvm-project/issues/107753 Fix builtin attributes Cleanup --- clang/docs/LanguageExtensions.rst | 18 ++++ clang/docs/ReleaseNotes.rst | 3 + clang/include/clang/Basic/Builtins.td | 12 +++ .../clang/Basic/DiagnosticSemaKinds.td | 9 +- clang/lib/CodeGen/CGBuiltin.cpp | 38 +++++++++ clang/lib/Sema/SemaChecking.cpp | 83 +++++++++++++++++++ clang/test/CodeGen/builtin-masked.c | 53 ++++++++++++ clang/test/Sema/builtin-masked.c | 25 ++++++ 8 files changed, 239 insertions(+), 2 deletions(-) create mode 100644 clang/test/CodeGen/builtin-masked.c create mode 100644 clang/test/Sema/builtin-masked.c diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst index 12ca4cf42f7cc..df256c7f8c063 100644 --- a/clang/docs/LanguageExtensions.rst +++ b/clang/docs/LanguageExtensions.rst @@ -941,6 +941,24 @@ Let ``VT`` be a vector type and ``ET`` the element type of ``VT``. for the comparison. ======================================= ====================================================================== ================================== +*Masked Builtins* + +Each builtin accesses memory according to a provided boolean mask. These are +provided as ``__builtin_masked_load`` and ``__builtin_masked_store``. The first +argument is always boolean mask vector. + +Example: + +.. code-block:: c++ + + using v8b = bool [[clang::ext_vector_type(8)]]; + using v8i = int [[clang::ext_vector_type(8)]]; + + v8i load(v8b m, v8i *p) { return __builtin_masked_load(m, p); } + + void store(v8b m, v8i v, v8i *p) { __builtin_masked_store(m, v, p); } + + Matrix Types ============ diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index e88d68fa99664..6f92ce8d1ba44 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -140,6 +140,9 @@ Non-comprehensive list of changes in this release - A vector of booleans is now a valid condition for the ternary ``?:`` operator. This binds to a simple vector select operation. +- Added ``__builtin_masked_load`` and ``__builtin_masked_store`` for conditional + memory loads from vectors. Binds to the LLVM intrinsic of the same name. + - Use of ``__has_feature`` to detect the ``ptrauth_qualifier`` and ``ptrauth_intrinsics`` features has been deprecated, and is restricted to the arm64e target only. The correct method to check for these features is to test for the ``__PTRAUTH__`` diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td index ad340e2ed0eec..56f380ceba4ce 100644 --- a/clang/include/clang/Basic/Builtins.td +++ b/clang/include/clang/Basic/Builtins.td @@ -1232,6 +1232,18 @@ def ConvertVector : Builtin { let Prototype = "void(...)"; } +def MaskedLoad : Builtin { + let Spellings = ["__builtin_masked_load"]; + let Attributes = [NoThrow, CustomTypeChecking]; + let Prototype = "void(...)"; +} + +def MaskedStore : Builtin { + let Spellings = ["__builtin_masked_store"]; + let Attributes = [NoThrow, CustomTypeChecking]; + let Prototype = "void(...)"; +} + def AllocaUninitialized : Builtin { let Spellings = ["__builtin_alloca_uninitialized"]; let Attributes = [FunctionWithBuiltinPrefix, NoThrow]; diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index c733e8823cea6..3b34b7174b65f 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -10994,10 +10994,15 @@ def err_block_on_vm : Error< def err_sizeless_nonlocal : Error< "non-local variable with sizeless type %0">; +def err_vec_masked_load_store_ptr : Error< + "%ordinal0 argument must be a %1">; +def err_vec_masked_load_store_size : Error< + "all arguments to %0 must have the same number of elements (was %1 and %2)">; + def err_vec_builtin_non_vector : Error< "%select{first two|all}1 arguments to %0 must be vectors">; def err_vec_builtin_incompatible_vector : Error< - "%select{first two|all}1 arguments to %0 must have the same type">; + "%select{first two|all|last two}1 arguments to %0 must have the same type">; def err_vsx_builtin_nonconstant_argument : Error< "argument %0 to %1 must be a 2-bit unsigned literal (i.e. 0, 1, 2 or 3)">; @@ -12859,7 +12864,7 @@ def err_builtin_invalid_arg_type: Error< "%plural{0:|: }1" // Second component: integer-like types "%select{|integer|signed integer|unsigned integer|'int'|" - "pointer to a valid matrix element}2" + "pointer to a valid matrix element|boolean}2" // A space after a non-empty second component "%plural{0:|: }2" // An 'or' if non-empty second and third components are combined diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 59414fe466704..d9cc37d123fb4 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -4255,6 +4255,44 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, return RValue::get(Result); } + case Builtin::BI__builtin_masked_load: { + llvm::Value *Mask = EmitScalarExpr(E->getArg(0)); + llvm::Value *Ptr = EmitScalarExpr(E->getArg(1)); + + llvm::Type *RetTy = CGM.getTypes().ConvertType(E->getType()); + CharUnits Align = CGM.getNaturalTypeAlignment(E->getType(), nullptr); + llvm::Value *AlignVal = + llvm::ConstantInt::get(Int32Ty, Align.getQuantity()); + + llvm::Value *PassThru = llvm::PoisonValue::get(RetTy); + + Function *F = + CGM.getIntrinsic(Intrinsic::masked_load, {RetTy, UnqualPtrTy}); + + llvm::Value *Result = + Builder.CreateCall(F, {Ptr, AlignVal, Mask, PassThru}, "masked_load"); + return RValue::get(Result); + }; + case Builtin::BI__builtin_masked_store: { + llvm::Value *Mask = EmitScalarExpr(E->getArg(0)); + llvm::Value *Val = EmitScalarExpr(E->getArg(1)); + llvm::Value *Ptr = EmitScalarExpr(E->getArg(2)); + + QualType ValTy = E->getArg(1)->getType(); + llvm::Type *ValLLTy = CGM.getTypes().ConvertType(ValTy); + llvm::Type *PtrTy = Ptr->getType(); + + CharUnits Align = CGM.getNaturalTypeAlignment(ValTy, nullptr); + llvm::Value *AlignVal = + llvm::ConstantInt::get(Int32Ty, Align.getQuantity()); + + llvm::Function *F = + CGM.getIntrinsic(llvm::Intrinsic::masked_store, {ValLLTy, PtrTy}); + + Builder.CreateCall(F, {Val, Ptr, AlignVal, Mask}); + return RValue::get(nullptr); + } + case Builtin::BI__builtin_isinf_sign: { // isinf_sign(x) -> fabs(x) == infinity ? (signbit(x) ? -1 : 1) : 0 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index c21c40e707008..2b778211db6e5 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -2266,6 +2266,85 @@ static bool BuiltinCountZeroBitsGeneric(Sema &S, CallExpr *TheCall) { return false; } +static bool CheckMaskedBuiltinArgs(Sema &S, Expr *MaskArg, Expr *PtrArg, + unsigned Pos) { + QualType MaskTy = MaskArg->getType(); + if (!MaskTy->isExtVectorBoolType()) + return S.Diag(MaskArg->getBeginLoc(), diag::err_builtin_invalid_arg_type) + << 1 << /* vector of */ 4 << /* booleans */ 6 << /* no fp */ 0 + << MaskTy; + + QualType PtrTy = PtrArg->getType(); + if (!PtrTy->isPointerType() || !PtrTy->getPointeeType()->isVectorType()) + return S.Diag(PtrArg->getExprLoc(), diag::err_vec_masked_load_store_ptr) + << Pos << "pointer to vector"; + return false; +} + +static ExprResult BuiltinMaskedLoad(Sema &S, CallExpr *TheCall) { + if (S.checkArgCount(TheCall, 2)) + return ExprError(); + + Expr *MaskArg = TheCall->getArg(0); + Expr *PtrArg = TheCall->getArg(1); + if (CheckMaskedBuiltinArgs(S, MaskArg, PtrArg, 2)) + return ExprError(); + + QualType MaskTy = MaskArg->getType(); + QualType PtrTy = PtrArg->getType(); + QualType PointeeTy = PtrTy->getPointeeType(); + const VectorType *MaskVecTy = MaskTy->getAs<VectorType>(); + const VectorType *DataVecTy = PointeeTy->getAs<VectorType>(); + if (MaskVecTy->getNumElements() != DataVecTy->getNumElements()) + return ExprError( + S.Diag(TheCall->getBeginLoc(), diag::err_vec_masked_load_store_size) + << "__builtin_masked_load" << MaskTy << PointeeTy); + + TheCall->setType(PointeeTy); + return TheCall; +} + +static ExprResult BuiltinMaskedStore(Sema &S, CallExpr *TheCall) { + if (S.checkArgCount(TheCall, 3)) + return ExprError(); + + Expr *MaskArg = TheCall->getArg(0); + Expr *ValArg = TheCall->getArg(1); + Expr *PtrArg = TheCall->getArg(2); + + if (CheckMaskedBuiltinArgs(S, MaskArg, PtrArg, 3)) + return ExprError(); + + QualType MaskTy = MaskArg->getType(); + QualType PtrTy = PtrArg->getType(); + QualType ValTy = ValArg->getType(); + if (!ValTy->isVectorType()) + return ExprError( + S.Diag(ValArg->getExprLoc(), diag::err_vec_masked_load_store_ptr) + << 2 << "vector"); + + QualType PointeeTy = PtrTy->getPointeeType(); + const VectorType *MaskVecTy = MaskTy->getAs<VectorType>(); + const VectorType *ValVecTy = ValTy->getAs<VectorType>(); + const VectorType *PtrVecTy = PointeeTy->getAs<VectorType>(); + + if (MaskVecTy->getNumElements() != ValVecTy->getNumElements() || + MaskVecTy->getNumElements() != PtrVecTy->getNumElements()) + return ExprError( + S.Diag(TheCall->getBeginLoc(), diag::err_vec_masked_load_store_size) + << "__builtin_masked_store" << MaskTy << PointeeTy); + + if (!S.Context.hasSameType(ValTy, PointeeTy)) + return ExprError(S.Diag(TheCall->getBeginLoc(), + diag::err_vec_builtin_incompatible_vector) + << TheCall->getDirectCallee() << /*isMorethantwoArgs*/ 2 + << SourceRange(TheCall->getArg(1)->getBeginLoc(), + TheCall->getArg(1)->getEndLoc())); + + TheCall->setType(S.Context.VoidTy); + return TheCall; +} + static ExprResult BuiltinInvoke(Sema &S, CallExpr *TheCall) { SourceLocation Loc = TheCall->getBeginLoc(); MutableArrayRef Args(TheCall->getArgs(), TheCall->getNumArgs()); @@ -2518,6 +2597,10 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID, return BuiltinShuffleVector(TheCall); // TheCall will be freed by the smart pointer here, but that's fine, since // BuiltinShuffleVector guts it, but then doesn't release it. + case Builtin::BI__builtin_masked_load: + return BuiltinMaskedLoad(*this, TheCall); + case Builtin::BI__builtin_masked_store: + return BuiltinMaskedStore(*this, TheCall); case Builtin::BI__builtin_invoke: return BuiltinInvoke(*this, TheCall); case Builtin::BI__builtin_prefetch: diff --git a/clang/test/CodeGen/builtin-masked.c b/clang/test/CodeGen/builtin-masked.c new file mode 100644 index 0000000000000..67071ba19bd25 --- /dev/null +++ b/clang/test/CodeGen/builtin-masked.c @@ -0,0 +1,53 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s | FileCheck %s + +typedef int v8i __attribute__((ext_vector_type(8))); +typedef _Bool v8b __attribute__((ext_vector_type(8))); + +// CHECK-LABEL: define dso_local <8 x i32> @test_load( +// CHECK-SAME: i8 noundef [[M_COERCE:%.*]], ptr noundef [[P:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[M:%.*]] = alloca i8, align 1 +// CHECK-NEXT: [[M_ADDR:%.*]] = alloca i8, align 1 +// CHECK-NEXT: [[P_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: store i8 [[M_COERCE]], ptr [[M]], align 1 +// CHECK-NEXT: [[LOAD_BITS:%.*]] = load i8, ptr [[M]], align 1 +// CHECK-NEXT: [[M1:%.*]] = bitcast i8 [[LOAD_BITS]] to <8 x i1> +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i1> [[M1]] to i8 +// CHECK-NEXT: store i8 [[TMP0]], ptr [[M_ADDR]], align 1 +// CHECK-NEXT: store ptr [[P]], ptr [[P_ADDR]], align 8 +// CHECK-NEXT: [[LOAD_BITS2:%.*]] = load i8, ptr [[M_ADDR]], align 1 +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i8 [[LOAD_BITS2]] to <8 x i1> +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[P_ADDR]], align 8 +// CHECK-NEXT: [[MASKED_LOAD:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr [[TMP2]], i32 32, <8 x i1> [[TMP1]], <8 x i32> poison) +// CHECK-NEXT: ret <8 x i32> [[MASKED_LOAD]] +// +v8i test_load(v8b m, v8i *p) { + return __builtin_masked_load(m, p); +} + +// CHECK-LABEL: define dso_local void @test_store( +// CHECK-SAME: i8 noundef [[M_COERCE:%.*]], ptr noundef byval(<8 x i32>) align 32 [[TMP0:%.*]], ptr noundef [[P:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[M:%.*]] = alloca i8, align 1 +// CHECK-NEXT: [[M_ADDR:%.*]] = alloca i8, align 1 +// CHECK-NEXT: [[V_ADDR:%.*]] = alloca <8 x i32>, align 32 +// CHECK-NEXT: [[P_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: store i8 [[M_COERCE]], ptr [[M]], align 1 +// CHECK-NEXT: [[LOAD_BITS:%.*]] = load i8, ptr [[M]], align 1 +// CHECK-NEXT: [[M1:%.*]] = bitcast i8 [[LOAD_BITS]] to <8 x i1> +// CHECK-NEXT: [[V:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32 +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i1> [[M1]] to i8 +// CHECK-NEXT: store i8 [[TMP1]], ptr [[M_ADDR]], align 1 +// CHECK-NEXT: store <8 x i32> [[V]], ptr [[V_ADDR]], align 32 +// CHECK-NEXT: store ptr [[P]], ptr [[P_ADDR]], align 8 +// CHECK-NEXT: [[LOAD_BITS2:%.*]] = load i8, ptr [[M_ADDR]], align 1 +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[LOAD_BITS2]] to <8 x i1> +// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[V_ADDR]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[P_ADDR]], align 8 +// CHECK-NEXT: call void @llvm.masked.store.v8i32.p0(<8 x i32> [[TMP3]], ptr [[TMP4]], i32 32, <8 x i1> [[TMP2]]) +// CHECK-NEXT: ret void +// +void test_store(v8b m, v8i v, v8i *p) { + __builtin_masked_store(m, v, p); +} diff --git a/clang/test/Sema/builtin-masked.c b/clang/test/Sema/builtin-masked.c new file mode 100644 index 0000000000000..81f5323bbe260 --- /dev/null +++ b/clang/test/Sema/builtin-masked.c @@ -0,0 +1,25 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsyntax-only -verify %s + +typedef int v8i __attribute__((ext_vector_type(8))); +typedef _Bool v8b __attribute__((ext_vector_type(8))); +typedef _Bool v2b __attribute__((ext_vector_type(2))); +typedef float v8f __attribute__((ext_vector_type(8))); + +void test_masked_load(v8i *pf, v8b mask, v2b mask2) { + (void)__builtin_masked_load(mask); // expected-error {{too few arguments to function call, expected 2, have 1}} + (void)__builtin_masked_load(mask, pf, pf); // expected-error {{too many arguments to function call, expected 2, have 3}} + (void)__builtin_masked_load(mask2, pf); // expected-error {{all arguments to __builtin_masked_load must have the same number of elements}} + (void)__builtin_masked_load(mask, mask); // expected-error {{2nd argument must be a pointer to vector}} + (void)__builtin_masked_load(mask, (void *)0); // expected-error {{2nd argument must be a pointer to vector}} + (void)__builtin_masked_load(mask2, pf); // expected-error {{all arguments to __builtin_masked_load must have the same number of elements}} +} + +void test_masked_store(v8i *pf, v8f *pf2, v8b mask, v2b mask2) { + __builtin_masked_store(mask); // expected-error {{too few arguments to function call, expected 3, have 1}} + __builtin_masked_store(mask, 0, 0, 0); // expected-error {{too many arguments to function call, expected 3, have 4}} + __builtin_masked_store(0, 0, pf); // expected-error {{1st argument must be a vector of boolean types (was 'int')}} + __builtin_masked_store(mask, 0, pf); // expected-error {{2nd argument must be a vector}} + __builtin_masked_store(mask, *pf, 0); // expected-error {{3rd argument must be a pointer to vector}} + __builtin_masked_store(mask2, *pf, pf); // expected-error {{all arguments to __builtin_masked_store must have the same number of elements}} + __builtin_masked_store(mask, *pf, pf2); // expected-error {{last two arguments to '__builtin_masked_store' must have the same type}} +} _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits