llvmorg-github-actions[bot] wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-llvm-ir @llvm/pr-subscribers-hlsl Author: Sietze Riemersma (KungFuDonkey) <details> <summary>Changes</summary> This PR adds the InterlockedOr function to HLSL. A similar PR from last year was made for this #<!-- -->180804 but was never merged. So I reimplemented as InterlockedAdd went in recently, which made this change easy enough for me to do. Added some reusability on the for future interlocked functions --- Patch is 27.93 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/204923.diff 14 Files Affected: - (modified) clang/include/clang/Basic/Builtins.td (+6) - (modified) clang/lib/CodeGen/CGHLSLBuiltins.cpp (+37-27) - (modified) clang/lib/CodeGen/CGHLSLRuntime.h (+1) - (modified) clang/lib/Sema/HLSLExternalSemaSource.cpp (+18) - (modified) clang/lib/Sema/SemaHLSL.cpp (+4-3) - (added) clang/test/CodeGenHLSL/builtins/InterlockedOr.hlsl (+59) - (added) clang/test/SemaHLSL/BuiltIns/InterlockedOr-errors.hlsl (+100) - (modified) llvm/include/llvm/IR/IntrinsicsDirectX.td (+4) - (modified) llvm/include/llvm/IR/IntrinsicsSPIRV.td (+4) - (modified) llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp (+9-4) - (modified) llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp (+11-8) - (added) llvm/test/CodeGen/DirectX/InterlockedOr.ll (+52) - (added) llvm/test/CodeGen/SPIRV/hlsl-intrinsics/InterlockedOr.ll (+36) - (added) llvm/test/CodeGen/SPIRV/hlsl-intrinsics/InterlockedOr_spv_i64.ll (+37) ``````````diff diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td index 053a257ba6d4a..61e63c4d9b073 100644 --- a/clang/include/clang/Basic/Builtins.td +++ b/clang/include/clang/Basic/Builtins.td @@ -5465,6 +5465,12 @@ def HLSLInterlockedAdd : LangBuiltin<"HLSL_LANG"> { let Prototype = "void (...)"; } +def HLSLInterlockedOr : LangBuiltin<"HLSL_LANG"> { + let Spellings = ["__builtin_hlsl_interlocked_or"]; + let Attributes = [NoThrow]; + let Prototype = "void (...)"; +} + def HLSLWaveActiveBallot : LangBuiltin<"HLSL_LANG"> { let Spellings = ["__builtin_hlsl_wave_active_ballot"]; let Attributes = [NoThrow, Const]; diff --git a/clang/lib/CodeGen/CGHLSLBuiltins.cpp b/clang/lib/CodeGen/CGHLSLBuiltins.cpp index 20a2119e28ce1..5f184dbb91068 100644 --- a/clang/lib/CodeGen/CGHLSLBuiltins.cpp +++ b/clang/lib/CodeGen/CGHLSLBuiltins.cpp @@ -172,6 +172,35 @@ static Value *handleHlslSplitdouble(const CallExpr *E, CodeGenFunction *CGF) { return LastInst; } +// Emit an HLSL Interlocked* atomic operation. All Interlocked* builtins share +// the same shape, differing only in the target intrinsic: +// void Interlocked<Op>(groupshared|device T &dest, T value); +// void Interlocked<Op>(groupshared|device T &dest, T value, +// T &original_value); +// Both `dest` and `original_value` are plain references, so we can use the +// underlying lvalue directly without HLSLOutArgExpr unwrapping. +static Value *handleHlslInterlocked(CodeGenFunction &CGF, const CallExpr *E, + Intrinsic::ID ID, const Twine &Name) { + LValue DestLV = CGF.EmitLValue(E->getArg(0)); + Value *Ptr = DestLV.getAddress().emitRawPointer(CGF); + Value *Val = CGF.EmitScalarExpr(E->getArg(1)); + assert(E->getArg(1)->getType()->isIntegerType() && + "Intrinsic Interlocked value operand must be an integer"); + + Value *Call = CGF.EmitRuntimeCall( + Intrinsic::getOrInsertDeclaration(&CGF.CGM.getModule(), ID, + {Val->getType(), Ptr->getType()}), + ArrayRef<Value *>{Ptr, Val}, Name); + + // The 3-arg overload writes the old value (the intrinsic's return value) + // into the `original_value` reference parameter. + if (E->getNumArgs() == 3) { + LValue OrigLV = CGF.EmitLValue(E->getArg(2)); + CGF.EmitStoreThroughLValue(RValue::get(Call), OrigLV); + } + return Call; +} + static Value *handleHlslWaveActiveBallot(CodeGenFunction &CGF, const CallExpr *E) { Value *Cond = CGF.EmitScalarExpr(E->getArg(0)); @@ -1427,33 +1456,14 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, return EmitIntrinsicCall(ID, {Op->getType()}, ArrayRef{Op}, "hlsl.wave.active.bit.and"); } - case Builtin::BI__builtin_hlsl_interlocked_add: { - // HLSL signatures (synthesized as overloads in HLSLExternalSemaSource): - // void InterlockedAdd(groupshared|device T &dest, T value); - // void InterlockedAdd(groupshared|device T &dest, T value, - // T &original_value); - // Both `dest` and `original_value` are plain references, so we can use - // the underlying lvalue directly without HLSLOutArgExpr unwrapping. - LValue DestLV = EmitLValue(E->getArg(0)); - Value *Ptr = DestLV.getAddress().emitRawPointer(*this); - Value *Val = EmitScalarExpr(E->getArg(1)); - assert(E->getArg(1)->getType()->isIntegerType() && - "Intrinsic InterlockedAdd value operand must be an integer"); - - Intrinsic::ID ID = CGM.getHLSLRuntime().getInterlockedAddIntrinsic(); - Value *Call = EmitRuntimeCall( - Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID, - {Val->getType(), Ptr->getType()}), - ArrayRef<Value *>{Ptr, Val}, "hlsl.interlocked.add"); - - // The 3-arg overload writes the old value (the intrinsic's return value) - // into the `original_value` reference parameter. - if (E->getNumArgs() == 3) { - LValue OrigLV = EmitLValue(E->getArg(2)); - EmitStoreThroughLValue(RValue::get(Call), OrigLV); - } - return Call; - } + case Builtin::BI__builtin_hlsl_interlocked_add: + return handleHlslInterlocked( + *this, E, CGM.getHLSLRuntime().getInterlockedAddIntrinsic(), + "hlsl.interlocked.add"); + case Builtin::BI__builtin_hlsl_interlocked_or: + return handleHlslInterlocked( + *this, E, CGM.getHLSLRuntime().getInterlockedOrIntrinsic(), + "hlsl.interlocked.or"); case Builtin::BI__builtin_hlsl_wave_active_ballot: { [[maybe_unused]] Value *Op = EmitScalarExpr(E->getArg(0)); assert(Op->getType()->isIntegerTy(1) && diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h index a126d4612a5f4..154d19ff7bd25 100644 --- a/clang/lib/CodeGen/CGHLSLRuntime.h +++ b/clang/lib/CodeGen/CGHLSLRuntime.h @@ -152,6 +152,7 @@ class CGHLSLRuntime { GENERATE_HLSL_INTRINSIC_FUNCTION(WaveActiveBitXor, wave_reduce_xor) GENERATE_HLSL_INTRINSIC_FUNCTION(WaveActiveBitAnd, wave_reduce_and) GENERATE_HLSL_INTRINSIC_FUNCTION(InterlockedAdd, interlocked_add) + GENERATE_HLSL_INTRINSIC_FUNCTION(InterlockedOr, interlocked_or) GENERATE_HLSL_INTRINSIC_FUNCTION(WaveActiveMax, wave_reduce_max) GENERATE_HLSL_INTRINSIC_FUNCTION(WaveActiveUMax, wave_reduce_umax) GENERATE_HLSL_INTRINSIC_FUNCTION(WaveActiveMin, wave_reduce_min) diff --git a/clang/lib/Sema/HLSLExternalSemaSource.cpp b/clang/lib/Sema/HLSLExternalSemaSource.cpp index 3f7255cb3f8a7..537357be5e2bd 100644 --- a/clang/lib/Sema/HLSLExternalSemaSource.cpp +++ b/clang/lib/Sema/HLSLExternalSemaSource.cpp @@ -760,8 +760,26 @@ static void defineHLSLInterlockedAdd(Sema &S, NamespaceDecl *NS) { ThreeArg); } +// Synthesize the InterlockedOr overload set: {int, uint, int64_t, uint64_t} +// x {groupshared, device} x {2-arg, 3-arg}. +static void defineHLSLInterlockedOr(Sema &S, NamespaceDecl *NS) { + ASTContext &AST = S.getASTContext(); + // HLSL: int64_t == long, uint64_t == unsigned long (see hlsl_basic_types.h). + QualType Elems[] = {AST.IntTy, AST.UnsignedIntTy, AST.LongTy, + AST.UnsignedLongTy}; + LangAS AddrSpaces[] = {LangAS::hlsl_groupshared, LangAS::hlsl_device}; + + for (QualType ElemTy : Elems) + for (LangAS AS : AddrSpaces) + for (bool ThreeArg : {false, true}) + buildAtomicOverload(S, NS, "InterlockedOr", + "__builtin_hlsl_interlocked_or", ElemTy, AS, + ThreeArg); +} + void HLSLExternalSemaSource::defineHLSLAtomicIntrinsics() { defineHLSLInterlockedAdd(*SemaPtr, HLSLNamespace); + defineHLSLInterlockedOr(*SemaPtr, HLSLNamespace); } void HLSLExternalSemaSource::onCompletion(CXXRecordDecl *Record, diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp index 075dc97b0aef2..e3d8e4ff22bcb 100644 --- a/clang/lib/Sema/SemaHLSL.cpp +++ b/clang/lib/Sema/SemaHLSL.cpp @@ -4534,10 +4534,11 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { TheCall->setType(ArgTyExpr); break; } - case Builtin::BI__builtin_hlsl_interlocked_add: { + case Builtin::BI__builtin_hlsl_interlocked_add: + case Builtin::BI__builtin_hlsl_interlocked_or: { // The builtin's prototype in Builtins.td is `void (...)`, so direct calls - // to `__builtin_hlsl_interlocked_add` bypass argument checking entirely. - // When reached via the synthesized `InterlockedAdd` overload set in + // to `__builtin_hlsl_interlocked_*` bypass argument checking entirely. + // When reached via the synthesized `Interlocked*` overload set in // HLSLExternalSemaSource, overload resolution has already enforced the // argument count, integer-type matching, and the address-space requirement // on `dest`. The checks below are a safety net for callers that invoke the diff --git a/clang/test/CodeGenHLSL/builtins/InterlockedOr.hlsl b/clang/test/CodeGenHLSL/builtins/InterlockedOr.hlsl new file mode 100644 index 0000000000000..a4c4f4cc7dd6c --- /dev/null +++ b/clang/test/CodeGenHLSL/builtins/InterlockedOr.hlsl @@ -0,0 +1,59 @@ +// RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -triple \ +// RUN: dxil-pc-shadermodel6.6-compute %s -emit-llvm -disable-llvm-passes -o - | \ +// RUN: FileCheck %s --check-prefixes=CHECK,DXCHECK + +// RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -triple \ +// RUN: spirv-pc-vulkan-compute %s -emit-llvm -disable-llvm-passes -o - | \ +// RUN: FileCheck %s --check-prefixes=CHECK,SPVCHECK + +// Test basic lowering of HLSL InterlockedOr to the target intrinsic. + +groupshared int gs_i32; +groupshared uint gs_u32; +groupshared int64_t gs_i64; +groupshared uint64_t gs_u64; + +// CHECK-LABEL: define {{(dso_local |hidden |internal |protected |spir_func )*}}void @{{.*}}test_int_2arg +// DXCHECK: call i32 @llvm.dx.interlocked.or.i32.p3(ptr addrspace(3) {{.*}}@gs_i32{{.*}}, i32 %{{.*}}) +// SPVCHECK: call spir_func i32 @llvm.spv.interlocked.or.i32.p3(ptr addrspace(3) {{.*}}@gs_i32{{.*}}, i32 %{{.*}}) +export void test_int_2arg(int v) { + InterlockedOr(gs_i32, v); +} + +// CHECK-LABEL: define {{(dso_local |hidden |internal |protected |spir_func )*}}void @{{.*}}test_uint_2arg +// DXCHECK: call i32 @llvm.dx.interlocked.or.i32.p3(ptr addrspace(3) {{.*}}@gs_u32{{.*}}, i32 %{{.*}}) +// SPVCHECK: call spir_func i32 @llvm.spv.interlocked.or.i32.p3(ptr addrspace(3) {{.*}}@gs_u32{{.*}}, i32 %{{.*}}) +export void test_uint_2arg(uint v) { + InterlockedOr(gs_u32, v); +} + +// CHECK-LABEL: define {{(dso_local |hidden |internal |protected |spir_func )*}}void @{{.*}}test_int_3arg +// DXCHECK: %[[R:.*]] = call i32 @llvm.dx.interlocked.or.i32.p3(ptr addrspace(3) {{.*}}@gs_i32{{.*}}, i32 %{{.*}}) +// SPVCHECK: %[[R:.*]] = call spir_func i32 @llvm.spv.interlocked.or.i32.p3(ptr addrspace(3) {{.*}}@gs_i32{{.*}}, i32 %{{.*}}) +// CHECK: store i32 %[[R]], ptr {{.*}} +export void test_int_3arg(int v, out int orig) { + InterlockedOr(gs_i32, v, orig); +} + +// CHECK-LABEL: define {{(dso_local |hidden |internal |protected |spir_func )*}}void @{{.*}}test_uint_3arg +// DXCHECK: %[[R:.*]] = call i32 @llvm.dx.interlocked.or.i32.p3(ptr addrspace(3) {{.*}}@gs_u32{{.*}}, i32 %{{.*}}) +// SPVCHECK: %[[R:.*]] = call spir_func i32 @llvm.spv.interlocked.or.i32.p3(ptr addrspace(3) {{.*}}@gs_u32{{.*}}, i32 %{{.*}}) +// CHECK: store i32 %[[R]], ptr {{.*}} +export void test_uint_3arg(uint v, out uint orig) { + InterlockedOr(gs_u32, v, orig); +} + +// CHECK-LABEL: define {{(dso_local |hidden |internal |protected |spir_func )*}}void @{{.*}}test_int64_2arg +// DXCHECK: call i64 @llvm.dx.interlocked.or.i64.p3(ptr addrspace(3) {{.*}}@gs_i64{{.*}}, i64 %{{.*}}) +// SPVCHECK: call spir_func i64 @llvm.spv.interlocked.or.i64.p3(ptr addrspace(3) {{.*}}@gs_i64{{.*}}, i64 %{{.*}}) +export void test_int64_2arg(int64_t v) { + InterlockedOr(gs_i64, v); +} + +// CHECK-LABEL: define {{(dso_local |hidden |internal |protected |spir_func )*}}void @{{.*}}test_uint64_3arg +// DXCHECK: %[[R:.*]] = call i64 @llvm.dx.interlocked.or.i64.p3(ptr addrspace(3) {{.*}}@gs_u64{{.*}}, i64 %{{.*}}) +// SPVCHECK: %[[R:.*]] = call spir_func i64 @llvm.spv.interlocked.or.i64.p3(ptr addrspace(3) {{.*}}@gs_u64{{.*}}, i64 %{{.*}}) +// CHECK: store i64 %[[R]], ptr {{.*}} +export void test_uint64_3arg(uint64_t v, out uint64_t orig) { + InterlockedOr(gs_u64, v, orig); +} diff --git a/clang/test/SemaHLSL/BuiltIns/InterlockedOr-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/InterlockedOr-errors.hlsl new file mode 100644 index 0000000000000..faa2825139ad4 --- /dev/null +++ b/clang/test/SemaHLSL/BuiltIns/InterlockedOr-errors.hlsl @@ -0,0 +1,100 @@ +// RUN: %clang_cc1 -std=hlsl202x -finclude-default-header \ +// RUN: -triple dxil-pc-shadermodel6.6-library %s -emit-llvm-only \ +// RUN: -disable-llvm-passes -verify + +// InterlockedOr is provided as a set of address-space-qualified overloads +// (groupshared/device, {int,uint,int64_t,uint64_t}, 2-arg/3-arg). All arg +// mismatches surface as "no matching function" with 16 candidates. The +// candidate notes come from synthesized FunctionDecls with no source +// location, so they are matched with `@*:*`. + +groupshared int gs_i32; +groupshared float gs_f32; +struct S { int x; }; +groupshared S gs_s; + +void too_few(int v) { + InterlockedOr(gs_i32); // expected-error{{no matching function for call to 'InterlockedOr'}} + // expected-note@*:* 16 {{candidate function}} +} + +void too_many(int v, int extra) { + int o; + InterlockedOr(gs_i32, v, o, extra); // expected-error{{no matching function for call to 'InterlockedOr'}} + // expected-note@*:* 16 {{candidate function}} +} + +// Atomics must operate on actual addresses in groupshared or device memory; +// passing a plain local (no address space) must not bind to any overload. +void local_dest(int v) { + int dest; + InterlockedOr(dest, v); // expected-error{{no matching function for call to 'InterlockedOr'}} + // expected-note@*:* 16 {{candidate function}} +} + +void float_dest(float v) { + InterlockedOr(gs_f32, v); // expected-error{{no matching function for call to 'InterlockedOr'}} + // expected-note@*:* 16 {{candidate function}} +} + +void struct_dest(int v) { + InterlockedOr(gs_s, v); // expected-error{{no matching function for call to 'InterlockedOr'}} + // expected-note@*:* 16 {{candidate function}} +} + +void mismatched_orig_type(int v) { + uint orig; + InterlockedOr(gs_i32, v, orig); // expected-error{{no matching function for call to 'InterlockedOr'}} + // expected-note@*:* 16 {{candidate function}} +} + +// The tests below exercise direct invocations of the underlying clang builtin +// `__builtin_hlsl_interlocked_or`. These bypass overload resolution against +// the synthesized `InterlockedOr` overload set (the builtin's prototype in +// Builtins.td is `void (...)`), so each error is produced by the explicit +// checks in SemaHLSL.cpp rather than by candidate-set rejection. + +void direct_too_few() { + __builtin_hlsl_interlocked_or(gs_i32); + // expected-error@-1 {{too few arguments to function call, expected at least 2, have 1}} +} + +void direct_too_many(int v, int extra) { + int o; + __builtin_hlsl_interlocked_or(gs_i32, v, o, extra); + // expected-error@-1 {{too many arguments to function call, expected at most 3, have 4}} +} + +void direct_non_integer_dest() { + S local_s; + __builtin_hlsl_interlocked_or(local_s, 1); + // expected-error@-1 {{1st argument must be a scalar integer type (was 'S')}} +} + +void direct_nonlvalue_dest(int v) { + __builtin_hlsl_interlocked_or(1, v); + // expected-error@-1 {{cannot bind non-lvalue argument '1' to out parameter}} +} + +void direct_mismatched_value() { + uint uv = 1u; + __builtin_hlsl_interlocked_or(gs_i32, uv); + // expected-error@-1 {{passing 'uint' (aka 'unsigned int') to parameter of incompatible type 'int'}} +} + +void direct_mismatched_orig(int v) { + uint orig; + __builtin_hlsl_interlocked_or(gs_i32, v, orig); + // expected-error@-1 {{passing 'uint' (aka 'unsigned int') to parameter of incompatible type 'int'}} +} + +void direct_nonlvalue_orig(int v) { + __builtin_hlsl_interlocked_or(gs_i32, v, 1); + // expected-error@-1 {{cannot bind non-lvalue argument '1' to out parameter}} +} + +void direct_default_as_dest(int v) { + int local; + __builtin_hlsl_interlocked_or(local, v); + // expected-error@-1 {{1st argument to atomic builtin must reference groupshared or device memory (was 'int')}} +} diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td index af360dfc78965..d2db4905aeabe 100644 --- a/llvm/include/llvm/IR/IntrinsicsDirectX.td +++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td @@ -261,6 +261,10 @@ def int_dx_interlocked_add : DefaultAttrsIntrinsic<[llvm_anyint_ty], [llvm_anyptr_ty, LLVMMatchType<0>], [IntrArgMemOnly]>; +def int_dx_interlocked_or : + DefaultAttrsIntrinsic<[llvm_anyint_ty], + [llvm_anyptr_ty, LLVMMatchType<0>], + [IntrArgMemOnly]>; def int_dx_wave_reduce_max : DefaultAttrsIntrinsic<[llvm_any_ty], [LLVMMatchType<0>], [IntrConvergent, IntrNoMem, IntrTriviallyScalarizable]>; def int_dx_wave_reduce_umax : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [IntrConvergent, IntrNoMem, IntrTriviallyScalarizable]>; def int_dx_wave_reduce_min : DefaultAttrsIntrinsic<[llvm_any_ty], [LLVMMatchType<0>], [IntrConvergent, IntrNoMem, IntrTriviallyScalarizable]>; diff --git a/llvm/include/llvm/IR/IntrinsicsSPIRV.td b/llvm/include/llvm/IR/IntrinsicsSPIRV.td index 6e4cf8f7e72dc..5c59a32ddce99 100644 --- a/llvm/include/llvm/IR/IntrinsicsSPIRV.td +++ b/llvm/include/llvm/IR/IntrinsicsSPIRV.td @@ -152,6 +152,10 @@ def int_spv_rsqrt : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty] DefaultAttrsIntrinsic<[llvm_anyint_ty], [llvm_anyptr_ty, LLVMMatchType<0>], [IntrArgMemOnly]>; + def int_spv_interlocked_or : + DefaultAttrsIntrinsic<[llvm_anyint_ty], + [llvm_anyptr_ty, LLVMMatchType<0>], + [IntrArgMemOnly]>; def int_spv_subgroup_ballot : ClangBuiltin<"__builtin_spirv_subgroup_ballot">, DefaultAttrsIntrinsic<[llvm_v4i32_ty], [llvm_i1_ty], [IntrConvergent, IntrNoMem]>; def int_spv_wave_reduce_umax : DefaultAttrsIntrinsic<[llvm_any_ty], [LLVMMatchType<0>], [IntrConvergent, IntrNoMem]>; diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp index 88eda6656d89b..62fb8d1b12891 100644 --- a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp +++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp @@ -228,6 +228,7 @@ static bool isIntrinsicExpansion(Function &F) { case Intrinsic::dx_step: case Intrinsic::dx_radians: case Intrinsic::dx_interlocked_add: + case Intrinsic::dx_interlocked_or: case Intrinsic::usub_sat: case Intrinsic::vector_reduce_add: case Intrinsic::vector_reduce_fadd: @@ -771,15 +772,16 @@ static Value *expandRadiansIntrinsic(CallInst *Orig) { return Builder.CreateFMul(X, PiOver180); } -static Value *expandInterlockedAddIntrinsic(CallInst *Orig) { - // Lower @llvm.dx.interlocked.add(ptr, val) to `atomicrmw add ptr, val +static Value *expandInterlockedIntrinsic(CallInst *Orig, + AtomicRMWInst::BinOp Op) { + // Lower @llvm.dx.interlocked.<op>(ptr, val) to `atomicrmw <op> ptr, val // monotonic`. HLSL Interlocked operations imply no fence/barrier, which maps // to monotonic ordering. The instruction's result is the old value, matching // the intrinsic's return value. Value *Ptr = Orig->getArgOperand(0); Value *Val = Orig->getArgOperand(1); IRBuilder<> Builder(Orig); - return Builder.CreateAtomicRMW(AtomicRMWInst::Add, Ptr, Val, MaybeAlign(), + return Builder.CreateAtomicRMW(Op, Ptr, Val, MaybeAlign(), AtomicOrdering::Monotonic); } @@ -1245,7 +1247,10 @@ static bool expandIntrinsic(Function &F, CallInst *Orig) { Result = expandRadiansIntrinsic(Orig); break; case Intrinsic::dx_interlocked_add: - Result = expandInterlockedAddIntrinsic(Orig); + Result = expandInterlockedIntrinsic(Orig, AtomicRMWInst::Add); + break; + case Intrinsic::dx_interlocked_or: + Result = expandInterlockedIntrinsic(Orig, AtomicRMWInst::Or); break; case Intrinsic::dx_resource_load_rawbuffer: if (expandBufferLoadIntrinsic(Orig, /*IsRaw*/ true)) diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp index cd99015a61ba9..2220fc72e3837 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp @@ -177,8 +177,8 @@ class SPIRVInstructionSelector : public InstructionSelector { bool selectAtomicRMW(Register ResVReg, SPIRVTypeInst ResType, MachineInstr &I, unsigned NewOpcode, unsigned NegateOpcode = 0) const; - bool selectInterlockedAdd(Register ResVReg, SPIRVTypeInst ResType, - MachineInstr &I) const; + bool selectInterlocked(Register ResVReg, SPIRVTypeInst ResType, + MachineInstr &I, u... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/204923 _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
