https://github.com/jhuber6 updated https://github.com/llvm/llvm-project/pull/157895
>From 105d21ef27d1993527924e1da5f181dc9a67eff1 Mon Sep 17 00:00:00 2001 From: Joseph Huber <hube...@outlook.com> Date: Wed, 10 Sep 2025 11:41:25 -0500 Subject: [PATCH 1/3] [Clang] Add vector gather / scatter builtins to clang Summary: This patch exposes `__builtin_masked_gather` and `__builtin_masked_scatter` to clang. These map to the underlying intrinsic relatively cleanly, needing only a level of indirection to take a vector of indices and a base pointer to a vector of pointers. --- clang/docs/LanguageExtensions.rst | 13 ++++ clang/docs/ReleaseNotes.rst | 4 + clang/include/clang/Basic/Builtins.td | 12 +++ clang/lib/CodeGen/CGBuiltin.cpp | 41 ++++++++++ clang/lib/Sema/SemaChecking.cpp | 107 +++++++++++++++++++++++++- clang/test/CodeGen/builtin-masked.c | 58 ++++++++++++++ clang/test/Sema/builtin-masked.c | 20 +++++ 7 files changed, 252 insertions(+), 3 deletions(-) diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst index ad190eace5b05..f3ce5ee534609 100644 --- a/clang/docs/LanguageExtensions.rst +++ b/clang/docs/LanguageExtensions.rst @@ -957,6 +957,11 @@ builtins have the same interface but store the result in consecutive indices. Effectively this performs the ``if (mask[i]) val[i] = ptr[j++]`` and ``if (mask[i]) ptr[j++] = val[i]`` pattern respectively. +The ``__builtin_masked_gather`` and ``__builtin_masked_scatter`` builtins handle +non-sequential memory access for vector types. These use a base pointer and a +vector of integer indices to gather memory into a vector type or scatter it to +separate indices. + Example: .. code-block:: c++ @@ -978,6 +983,14 @@ Example: __builtin_masked_compress_store(mask, val, ptr); } + v8i gather(v8b mask, v8i idx, int *ptr) { + return __builtin_masked_gather(mask, idx, ptr); + } + + void scatter(v8b mask, v8i val, v8i idx, int *ptr) { + __builtin_masked_scatter(mask, idx, val, ptr); + } + Matrix Types ============ diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index e1e497ccdbccd..4a68688540ddb 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -183,6 +183,10 @@ Non-comprehensive list of changes in this release conditional memory loads from vectors. Binds to the LLVM intrinsics of the same name. +- Added ``__builtin_masked_gather`` and ``__builtin_masked_scatter`` for + conditional gathering and scattering operations on vectors. Binds to the LLVM + intrinsics of the same name. + - The ``__builtin_popcountg``, ``__builtin_ctzg``, and ``__builtin_clzg`` functions now accept fixed-size boolean vectors. diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td index 27639f06529cb..97be087aa752a 100644 --- a/clang/include/clang/Basic/Builtins.td +++ b/clang/include/clang/Basic/Builtins.td @@ -1256,6 +1256,18 @@ def MaskedCompressStore : Builtin { let Prototype = "void(...)"; } +def MaskedGather : Builtin { + let Spellings = ["__builtin_masked_gather"]; + let Attributes = [NoThrow, CustomTypeChecking]; + let Prototype = "void(...)"; +} + +def MaskedScatter : Builtin { + let Spellings = ["__builtin_masked_scatter"]; + let Attributes = [NoThrow, CustomTypeChecking]; + let Prototype = "void(...)"; +} + def AllocaUninitialized : Builtin { let Spellings = ["__builtin_alloca_uninitialized"]; let Attributes = [FunctionWithBuiltinPrefix, NoThrow]; diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 172a521e63c17..ef50ba8328bfd 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -4298,6 +4298,30 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, } return RValue::get(Result); }; + case Builtin::BI__builtin_masked_gather: { + llvm::Value *Mask = EmitScalarExpr(E->getArg(0)); + llvm::Value *Idx = EmitScalarExpr(E->getArg(1)); + llvm::Value *Ptr = EmitScalarExpr(E->getArg(2)); + + llvm::Type *RetTy = CGM.getTypes().ConvertType(E->getType()); + llvm::Type *ElemTy = CGM.getTypes().ConvertType( + E->getType()->getAs<VectorType>()->getElementType()); + llvm::Value *AlignVal = llvm::ConstantInt::get(Int32Ty, 1); + + llvm::Value *PassThru = llvm::PoisonValue::get(RetTy); + if (E->getNumArgs() > 3) + PassThru = EmitScalarExpr(E->getArg(3)); + + llvm::Value *PtrVec = Builder.CreateGEP(ElemTy, Ptr, Idx); + + llvm::Value *Result; + Function *F = + CGM.getIntrinsic(Intrinsic::masked_gather, {RetTy, PtrVec->getType()}); + + Result = Builder.CreateCall(F, {PtrVec, AlignVal, Mask, PassThru}, + "masked_gather"); + return RValue::get(Result); + } case Builtin::BI__builtin_masked_store: case Builtin::BI__builtin_masked_compress_store: { llvm::Value *Mask = EmitScalarExpr(E->getArg(0)); @@ -4323,7 +4347,24 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, } return RValue::get(nullptr); } + case Builtin::BI__builtin_masked_scatter: { + llvm::Value *Mask = EmitScalarExpr(E->getArg(0)); + llvm::Value *Val = EmitScalarExpr(E->getArg(1)); + llvm::Value *Idx = EmitScalarExpr(E->getArg(2)); + llvm::Value *Ptr = EmitScalarExpr(E->getArg(3)); + llvm::Type *ElemTy = CGM.getTypes().ConvertType( + E->getArg(1)->getType()->getAs<VectorType>()->getElementType()); + llvm::Value *AlignVal = llvm::ConstantInt::get(Int32Ty, 1); + + llvm::Value *PtrVec = Builder.CreateGEP(ElemTy, Ptr, Idx); + + Function *F = CGM.getIntrinsic(Intrinsic::masked_scatter, + {Val->getType(), PtrVec->getType()}); + + Builder.CreateCall(F, {Val, PtrVec, AlignVal, Mask}); + return RValue(); + } case Builtin::BI__builtin_isinf_sign: { // isinf_sign(x) -> fabs(x) == infinity ? (signbit(x) ? -1 : 1) : 0 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 077f4311ed729..6634f38182e41 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -2270,7 +2270,7 @@ static bool BuiltinCountZeroBitsGeneric(Sema &S, CallExpr *TheCall) { } static bool CheckMaskedBuiltinArgs(Sema &S, Expr *MaskArg, Expr *PtrArg, - unsigned Pos) { + unsigned Pos, bool Vector = true) { QualType MaskTy = MaskArg->getType(); if (!MaskTy->isExtVectorBoolType()) return S.Diag(MaskArg->getBeginLoc(), diag::err_builtin_invalid_arg_type) @@ -2278,9 +2278,11 @@ static bool CheckMaskedBuiltinArgs(Sema &S, Expr *MaskArg, Expr *PtrArg, << MaskTy; QualType PtrTy = PtrArg->getType(); - if (!PtrTy->isPointerType() || !PtrTy->getPointeeType()->isVectorType()) + if (!PtrTy->isPointerType() || + (Vector && !PtrTy->getPointeeType()->isVectorType()) || + (!Vector && PtrTy->getPointeeType()->isVectorType())) return S.Diag(PtrArg->getExprLoc(), diag::err_vec_masked_load_store_ptr) - << Pos << "pointer to vector"; + << Pos << (Vector ? "pointer to vector" : "scalar pointer"); return false; } @@ -2361,6 +2363,101 @@ static ExprResult BuiltinMaskedStore(Sema &S, CallExpr *TheCall) { return TheCall; } +static ExprResult BuiltinMaskedGather(Sema &S, CallExpr *TheCall) { + if (S.checkArgCountRange(TheCall, 3, 4)) + return ExprError(); + + Expr *MaskArg = TheCall->getArg(0); + Expr *IdxArg = TheCall->getArg(1); + Expr *PtrArg = TheCall->getArg(2); + if (CheckMaskedBuiltinArgs(S, MaskArg, PtrArg, 3, /*Vector=*/false)) + return ExprError(); + + QualType IdxTy = IdxArg->getType(); + const VectorType *IdxVecTy = IdxTy->getAs<VectorType>(); + if (!IdxTy->isExtVectorType() || !IdxVecTy->getElementType()->isIntegerType()) + return S.Diag(MaskArg->getBeginLoc(), diag::err_builtin_invalid_arg_type) + << 1 << /* vector of */ 4 << /* integer */ 1 << /* no fp */ 0 + << IdxTy; + + QualType MaskTy = MaskArg->getType(); + QualType PtrTy = PtrArg->getType(); + QualType PointeeTy = PtrTy->getPointeeType(); + const VectorType *MaskVecTy = MaskTy->getAs<VectorType>(); + if (MaskVecTy->getNumElements() != IdxVecTy->getNumElements()) + return ExprError( + S.Diag(TheCall->getBeginLoc(), diag::err_vec_masked_load_store_size) + << S.getASTContext().BuiltinInfo.getQuotedName( + TheCall->getBuiltinCallee()) + << MaskTy << IdxTy); + + QualType RetTy = + S.Context.getExtVectorType(PointeeTy, MaskVecTy->getNumElements()); + if (TheCall->getNumArgs() == 4) { + Expr *PassThruArg = TheCall->getArg(3); + QualType PassThruTy = PassThruArg->getType(); + if (!S.Context.hasSameType(PassThruTy, RetTy)) + return S.Diag(PassThruArg->getExprLoc(), + diag::err_vec_masked_load_store_ptr) + << /* fourth argument */ 4 << RetTy; + } + + TheCall->setType(RetTy); + return TheCall; +} + +static ExprResult BuiltinMaskedScatter(Sema &S, CallExpr *TheCall) { + if (S.checkArgCount(TheCall, 4)) + return ExprError(); + + Expr *MaskArg = TheCall->getArg(0); + Expr *IdxArg = TheCall->getArg(1); + Expr *ValArg = TheCall->getArg(2); + Expr *PtrArg = TheCall->getArg(3); + + if (CheckMaskedBuiltinArgs(S, MaskArg, PtrArg, 3, /*Vector=*/false)) + return ExprError(); + + QualType IdxTy = IdxArg->getType(); + const VectorType *IdxVecTy = IdxTy->getAs<VectorType>(); + if (!IdxTy->isExtVectorType() || !IdxVecTy->getElementType()->isIntegerType()) + return S.Diag(MaskArg->getBeginLoc(), diag::err_builtin_invalid_arg_type) + << 2 << /* vector of */ 4 << /* integer */ 1 << /* no fp */ 0 + << IdxTy; + + QualType ValTy = ValArg->getType(); + QualType MaskTy = MaskArg->getType(); + QualType PtrTy = PtrArg->getType(); + QualType PointeeTy = PtrTy->getPointeeType(); + + const VectorType *MaskVecTy = MaskTy->getAs<VectorType>(); + const VectorType *ValVecTy = ValTy->getAs<VectorType>(); + if (MaskVecTy->getNumElements() != IdxVecTy->getNumElements()) + return ExprError( + S.Diag(TheCall->getBeginLoc(), diag::err_vec_masked_load_store_size) + << S.getASTContext().BuiltinInfo.getQuotedName( + TheCall->getBuiltinCallee()) + << MaskTy << IdxTy); + if (MaskVecTy->getNumElements() != ValVecTy->getNumElements()) + return ExprError( + S.Diag(TheCall->getBeginLoc(), diag::err_vec_masked_load_store_size) + << S.getASTContext().BuiltinInfo.getQuotedName( + TheCall->getBuiltinCallee()) + << MaskTy << ValTy); + + QualType ArgTy = + S.Context.getExtVectorType(PointeeTy, MaskVecTy->getNumElements()); + if (!S.Context.hasSameType(ValTy, ArgTy)) + return ExprError(S.Diag(TheCall->getBeginLoc(), + diag::err_vec_builtin_incompatible_vector) + << TheCall->getDirectCallee() << /*isMorethantwoArgs*/ 2 + << SourceRange(TheCall->getArg(1)->getBeginLoc(), + TheCall->getArg(1)->getEndLoc())); + + TheCall->setType(S.Context.VoidTy); + return TheCall; +} + static ExprResult BuiltinInvoke(Sema &S, CallExpr *TheCall) { SourceLocation Loc = TheCall->getBeginLoc(); MutableArrayRef Args(TheCall->getArgs(), TheCall->getNumArgs()); @@ -2619,6 +2716,10 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID, case Builtin::BI__builtin_masked_store: case Builtin::BI__builtin_masked_compress_store: return BuiltinMaskedStore(*this, TheCall); + case Builtin::BI__builtin_masked_gather: + return BuiltinMaskedGather(*this, TheCall); + case Builtin::BI__builtin_masked_scatter: + return BuiltinMaskedScatter(*this, TheCall); case Builtin::BI__builtin_invoke: return BuiltinInvoke(*this, TheCall); case Builtin::BI__builtin_prefetch: diff --git a/clang/test/CodeGen/builtin-masked.c b/clang/test/CodeGen/builtin-masked.c index 579cf5c413c9b..66e6d10f1f3b1 100644 --- a/clang/test/CodeGen/builtin-masked.c +++ b/clang/test/CodeGen/builtin-masked.c @@ -129,3 +129,61 @@ void test_store(v8b m, v8i v, v8i *p) { void test_compress_store(v8b m, v8i v, v8i *p) { __builtin_masked_compress_store(m, v, p); } + +// CHECK-LABEL: define dso_local <8 x i32> @test_gather( +// CHECK-SAME: i8 noundef [[MASK_COERCE:%.*]], ptr noundef byval(<8 x i32>) align 32 [[TMP0:%.*]], ptr noundef [[PTR:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[MASK:%.*]] = alloca i8, align 1 +// CHECK-NEXT: [[MASK_ADDR:%.*]] = alloca i8, align 1 +// CHECK-NEXT: [[IDX_ADDR:%.*]] = alloca <8 x i32>, align 32 +// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: store i8 [[MASK_COERCE]], ptr [[MASK]], align 1 +// CHECK-NEXT: [[LOAD_BITS:%.*]] = load i8, ptr [[MASK]], align 1 +// CHECK-NEXT: [[MASK1:%.*]] = bitcast i8 [[LOAD_BITS]] to <8 x i1> +// CHECK-NEXT: [[IDX:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32 +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i1> [[MASK1]] to i8 +// CHECK-NEXT: store i8 [[TMP1]], ptr [[MASK_ADDR]], align 1 +// CHECK-NEXT: store <8 x i32> [[IDX]], ptr [[IDX_ADDR]], align 32 +// CHECK-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR]], align 8 +// CHECK-NEXT: [[LOAD_BITS2:%.*]] = load i8, ptr [[MASK_ADDR]], align 1 +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[LOAD_BITS2]] to <8 x i1> +// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[IDX_ADDR]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8 +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[TMP4]], <8 x i32> [[TMP3]] +// CHECK-NEXT: [[MASKED_GATHER:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP5]], i32 1, <8 x i1> [[TMP2]], <8 x i32> poison) +// CHECK-NEXT: ret <8 x i32> [[MASKED_GATHER]] +// +v8i test_gather(v8b mask, v8i idx, int *ptr) { + return __builtin_masked_gather(mask, idx, ptr); +} + +// CHECK-LABEL: define dso_local void @test_scatter( +// CHECK-SAME: i8 noundef [[MASK_COERCE:%.*]], ptr noundef byval(<8 x i32>) align 32 [[TMP0:%.*]], ptr noundef byval(<8 x i32>) align 32 [[TMP1:%.*]], ptr noundef [[PTR:%.*]]) #[[ATTR3]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[MASK:%.*]] = alloca i8, align 1 +// CHECK-NEXT: [[MASK_ADDR:%.*]] = alloca i8, align 1 +// CHECK-NEXT: [[VAL_ADDR:%.*]] = alloca <8 x i32>, align 32 +// CHECK-NEXT: [[IDX_ADDR:%.*]] = alloca <8 x i32>, align 32 +// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: store i8 [[MASK_COERCE]], ptr [[MASK]], align 1 +// CHECK-NEXT: [[LOAD_BITS:%.*]] = load i8, ptr [[MASK]], align 1 +// CHECK-NEXT: [[MASK1:%.*]] = bitcast i8 [[LOAD_BITS]] to <8 x i1> +// CHECK-NEXT: [[VAL:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32 +// CHECK-NEXT: [[IDX:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32 +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i1> [[MASK1]] to i8 +// CHECK-NEXT: store i8 [[TMP2]], ptr [[MASK_ADDR]], align 1 +// CHECK-NEXT: store <8 x i32> [[VAL]], ptr [[VAL_ADDR]], align 32 +// CHECK-NEXT: store <8 x i32> [[IDX]], ptr [[IDX_ADDR]], align 32 +// CHECK-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR]], align 8 +// CHECK-NEXT: [[LOAD_BITS2:%.*]] = load i8, ptr [[MASK_ADDR]], align 1 +// CHECK-NEXT: [[TMP3:%.*]] = bitcast i8 [[LOAD_BITS2]] to <8 x i1> +// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VAL_ADDR]], align 32 +// CHECK-NEXT: [[TMP5:%.*]] = load <8 x i32>, ptr [[IDX_ADDR]], align 32 +// CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8 +// CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP6]], <8 x i32> [[TMP5]] +// CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> [[TMP4]], <8 x ptr> [[TMP7]], i32 1, <8 x i1> [[TMP3]]) +// CHECK-NEXT: ret void +// +void test_scatter(v8b mask, v8i val, v8i idx, int *ptr) { + __builtin_masked_scatter(mask, val, idx, ptr); +} diff --git a/clang/test/Sema/builtin-masked.c b/clang/test/Sema/builtin-masked.c index 05c6580651964..eb0070b0276af 100644 --- a/clang/test/Sema/builtin-masked.c +++ b/clang/test/Sema/builtin-masked.c @@ -44,3 +44,23 @@ void test_masked_compress_store(v8i *pf, v8f *pf2, v8b mask, v2b mask2) { __builtin_masked_compress_store(mask2, *pf, pf); // expected-error {{all arguments to '__builtin_masked_compress_store' must have the same number of elements}} __builtin_masked_compress_store(mask, *pf, pf2); // expected-error {{last two arguments to '__builtin_masked_compress_store' must have the same type}} } + +void test_masked_gather(int *p, v8i idx, v8b mask, v2b mask2, v2b thru) { + __builtin_masked_gather(mask); // expected-error {{too few arguments to function call, expected 3, have 1}} + __builtin_masked_gather(mask, p, p, p, p, p); // expected-error {{too many arguments to function call, expected at most 4, have 6}} + __builtin_masked_gather(p, p, p); // expected-error {{1st argument must be a vector of boolean types (was 'int *')}} + __builtin_masked_gather(mask, p, p); // expected-error {{1st argument must be a vector of integer types (was 'int *')}} + __builtin_masked_gather(mask2, idx, p); // expected-error {{all arguments to '__builtin_masked_gather' must have the same number of elements (was 'v2b'}} + __builtin_masked_gather(mask, idx, p, thru); // expected-error {{4th argument must be a 'int __attribute__((ext_vector_type(8)))' (vector of 8 'int' values)}} + __builtin_masked_gather(mask, idx, &idx); // expected-error {{3rd argument must be a scalar pointer}} +} + +void test_masked_scatter(int *p, v8i idx, v8b mask, v2b mask2, v8i val) { + __builtin_masked_scatter(mask); // expected-error {{too few arguments to function call, expected 4, have 1}} + __builtin_masked_scatter(mask, p, p, p, p, p); // expected-error {{too many arguments to function call, expected 4, have 6}} + __builtin_masked_scatter(p, p, p, p); // expected-error {{1st argument must be a vector of boolean types (was 'int *')}} + __builtin_masked_scatter(mask, p, p, p); // expected-error {{2nd argument must be a vector of integer types (was 'int *')}} + __builtin_masked_scatter(mask, idx, mask, p); // expected-error {{last two arguments to '__builtin_masked_scatter' must have the same type}} + __builtin_masked_scatter(mask, idx, val, idx); // expected-error {{3rd argument must be a scalar pointer}} + __builtin_masked_scatter(mask, idx, val, &idx); // expected-error {{3rd argument must be a scalar pointer}} +} >From 6e5fdebbbf57acfe8b14dc993abcf49ae6c12fc8 Mon Sep 17 00:00:00 2001 From: Joseph Huber <hube...@outlook.com> Date: Thu, 11 Sep 2025 08:16:47 -0500 Subject: [PATCH 2/3] Alignment --- clang/lib/CodeGen/CGBuiltin.cpp | 21 ++++++++++++++------- clang/test/CodeGen/builtin-masked.c | 6 +++--- 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index ef50ba8328bfd..aa80877463a9a 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -4304,14 +4304,17 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, llvm::Value *Ptr = EmitScalarExpr(E->getArg(2)); llvm::Type *RetTy = CGM.getTypes().ConvertType(E->getType()); - llvm::Type *ElemTy = CGM.getTypes().ConvertType( - E->getType()->getAs<VectorType>()->getElementType()); - llvm::Value *AlignVal = llvm::ConstantInt::get(Int32Ty, 1); + CharUnits Align = CGM.getNaturalTypeAlignment( + E->getType()->getAs<VectorType>()->getElementType(), nullptr); + llvm::Value *AlignVal = + llvm::ConstantInt::get(Int32Ty, Align.getQuantity()); llvm::Value *PassThru = llvm::PoisonValue::get(RetTy); if (E->getNumArgs() > 3) PassThru = EmitScalarExpr(E->getArg(3)); + llvm::Type *ElemTy = CGM.getTypes().ConvertType( + E->getType()->getAs<VectorType>()->getElementType()); llvm::Value *PtrVec = Builder.CreateGEP(ElemTy, Ptr, Idx); llvm::Value *Result; @@ -4349,14 +4352,18 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, } case Builtin::BI__builtin_masked_scatter: { llvm::Value *Mask = EmitScalarExpr(E->getArg(0)); - llvm::Value *Val = EmitScalarExpr(E->getArg(1)); - llvm::Value *Idx = EmitScalarExpr(E->getArg(2)); + llvm::Value *Idx = EmitScalarExpr(E->getArg(1)); + llvm::Value *Val = EmitScalarExpr(E->getArg(2)); llvm::Value *Ptr = EmitScalarExpr(E->getArg(3)); + CharUnits Align = CGM.getNaturalTypeAlignment( + E->getArg(2)->getType()->getAs<VectorType>()->getElementType(), + nullptr); + llvm::Value *AlignVal = + llvm::ConstantInt::get(Int32Ty, Align.getQuantity()); + llvm::Type *ElemTy = CGM.getTypes().ConvertType( E->getArg(1)->getType()->getAs<VectorType>()->getElementType()); - llvm::Value *AlignVal = llvm::ConstantInt::get(Int32Ty, 1); - llvm::Value *PtrVec = Builder.CreateGEP(ElemTy, Ptr, Idx); Function *F = CGM.getIntrinsic(Intrinsic::masked_scatter, diff --git a/clang/test/CodeGen/builtin-masked.c b/clang/test/CodeGen/builtin-masked.c index 66e6d10f1f3b1..adb1ad4b698ac 100644 --- a/clang/test/CodeGen/builtin-masked.c +++ b/clang/test/CodeGen/builtin-masked.c @@ -150,7 +150,7 @@ void test_compress_store(v8b m, v8i v, v8i *p) { // CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[IDX_ADDR]], align 32 // CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8 // CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[TMP4]], <8 x i32> [[TMP3]] -// CHECK-NEXT: [[MASKED_GATHER:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP5]], i32 1, <8 x i1> [[TMP2]], <8 x i32> poison) +// CHECK-NEXT: [[MASKED_GATHER:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP5]], i32 4, <8 x i1> [[TMP2]], <8 x i32> poison) // CHECK-NEXT: ret <8 x i32> [[MASKED_GATHER]] // v8i test_gather(v8b mask, v8i idx, int *ptr) { @@ -180,8 +180,8 @@ v8i test_gather(v8b mask, v8i idx, int *ptr) { // CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VAL_ADDR]], align 32 // CHECK-NEXT: [[TMP5:%.*]] = load <8 x i32>, ptr [[IDX_ADDR]], align 32 // CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8 -// CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP6]], <8 x i32> [[TMP5]] -// CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> [[TMP4]], <8 x ptr> [[TMP7]], i32 1, <8 x i1> [[TMP3]]) +// CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP6]], <8 x i32> [[TMP4]] +// CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> [[TMP5]], <8 x ptr> [[TMP7]], i32 4, <8 x i1> [[TMP3]]) // CHECK-NEXT: ret void // void test_scatter(v8b mask, v8i val, v8i idx, int *ptr) { >From b64ba3af9bdea040aed254019ea9e4dfec07b4bc Mon Sep 17 00:00:00 2001 From: Joseph Huber <hube...@outlook.com> Date: Fri, 12 Sep 2025 07:42:17 -0500 Subject: [PATCH 3/3] Pure --- clang/include/clang/Basic/Builtins.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td index 97be087aa752a..218d27a010ff4 100644 --- a/clang/include/clang/Basic/Builtins.td +++ b/clang/include/clang/Basic/Builtins.td @@ -1258,7 +1258,7 @@ def MaskedCompressStore : Builtin { def MaskedGather : Builtin { let Spellings = ["__builtin_masked_gather"]; - let Attributes = [NoThrow, CustomTypeChecking]; + let Attributes = [NoThrow, Pure, CustomTypeChecking]; let Prototype = "void(...)"; } _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits