llvmorg-github-actions[bot] wrote:

<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-ir

@llvm/pr-subscribers-hlsl

Author: Sietze Riemersma (KungFuDonkey)

<details>
<summary>Changes</summary>

This PR adds the InterlockedOr function to HLSL. A similar PR from last year 
was made for this #<!-- -->180804 but was never merged. So I reimplemented as 
InterlockedAdd went in recently, which made this change easy enough for me to 
do.

Added some reusability on the for future interlocked functions

---

Patch is 27.93 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/204923.diff


14 Files Affected:

- (modified) clang/include/clang/Basic/Builtins.td (+6) 
- (modified) clang/lib/CodeGen/CGHLSLBuiltins.cpp (+37-27) 
- (modified) clang/lib/CodeGen/CGHLSLRuntime.h (+1) 
- (modified) clang/lib/Sema/HLSLExternalSemaSource.cpp (+18) 
- (modified) clang/lib/Sema/SemaHLSL.cpp (+4-3) 
- (added) clang/test/CodeGenHLSL/builtins/InterlockedOr.hlsl (+59) 
- (added) clang/test/SemaHLSL/BuiltIns/InterlockedOr-errors.hlsl (+100) 
- (modified) llvm/include/llvm/IR/IntrinsicsDirectX.td (+4) 
- (modified) llvm/include/llvm/IR/IntrinsicsSPIRV.td (+4) 
- (modified) llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp (+9-4) 
- (modified) llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp (+11-8) 
- (added) llvm/test/CodeGen/DirectX/InterlockedOr.ll (+52) 
- (added) llvm/test/CodeGen/SPIRV/hlsl-intrinsics/InterlockedOr.ll (+36) 
- (added) llvm/test/CodeGen/SPIRV/hlsl-intrinsics/InterlockedOr_spv_i64.ll 
(+37) 


``````````diff
diff --git a/clang/include/clang/Basic/Builtins.td 
b/clang/include/clang/Basic/Builtins.td
index 053a257ba6d4a..61e63c4d9b073 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -5465,6 +5465,12 @@ def HLSLInterlockedAdd : LangBuiltin<"HLSL_LANG"> {
   let Prototype = "void (...)";
 }
 
+def HLSLInterlockedOr : LangBuiltin<"HLSL_LANG"> {
+  let Spellings = ["__builtin_hlsl_interlocked_or"];
+  let Attributes = [NoThrow];
+  let Prototype = "void (...)";
+}
+
 def HLSLWaveActiveBallot : LangBuiltin<"HLSL_LANG"> {
   let Spellings = ["__builtin_hlsl_wave_active_ballot"];
   let Attributes = [NoThrow, Const];
diff --git a/clang/lib/CodeGen/CGHLSLBuiltins.cpp 
b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
index 20a2119e28ce1..5f184dbb91068 100644
--- a/clang/lib/CodeGen/CGHLSLBuiltins.cpp
+++ b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
@@ -172,6 +172,35 @@ static Value *handleHlslSplitdouble(const CallExpr *E, 
CodeGenFunction *CGF) {
   return LastInst;
 }
 
+// Emit an HLSL Interlocked* atomic operation. All Interlocked* builtins share
+// the same shape, differing only in the target intrinsic:
+//   void Interlocked<Op>(groupshared|device T &dest, T value);
+//   void Interlocked<Op>(groupshared|device T &dest, T value,
+//                        T &original_value);
+// Both `dest` and `original_value` are plain references, so we can use the
+// underlying lvalue directly without HLSLOutArgExpr unwrapping.
+static Value *handleHlslInterlocked(CodeGenFunction &CGF, const CallExpr *E,
+                                    Intrinsic::ID ID, const Twine &Name) {
+  LValue DestLV = CGF.EmitLValue(E->getArg(0));
+  Value *Ptr = DestLV.getAddress().emitRawPointer(CGF);
+  Value *Val = CGF.EmitScalarExpr(E->getArg(1));
+  assert(E->getArg(1)->getType()->isIntegerType() &&
+         "Intrinsic Interlocked value operand must be an integer");
+
+  Value *Call = CGF.EmitRuntimeCall(
+      Intrinsic::getOrInsertDeclaration(&CGF.CGM.getModule(), ID,
+                                        {Val->getType(), Ptr->getType()}),
+      ArrayRef<Value *>{Ptr, Val}, Name);
+
+  // The 3-arg overload writes the old value (the intrinsic's return value)
+  // into the `original_value` reference parameter.
+  if (E->getNumArgs() == 3) {
+    LValue OrigLV = CGF.EmitLValue(E->getArg(2));
+    CGF.EmitStoreThroughLValue(RValue::get(Call), OrigLV);
+  }
+  return Call;
+}
+
 static Value *handleHlslWaveActiveBallot(CodeGenFunction &CGF,
                                          const CallExpr *E) {
   Value *Cond = CGF.EmitScalarExpr(E->getArg(0));
@@ -1427,33 +1456,14 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned 
BuiltinID,
     return EmitIntrinsicCall(ID, {Op->getType()}, ArrayRef{Op},
                              "hlsl.wave.active.bit.and");
   }
-  case Builtin::BI__builtin_hlsl_interlocked_add: {
-    // HLSL signatures (synthesized as overloads in HLSLExternalSemaSource):
-    //   void InterlockedAdd(groupshared|device T &dest, T value);
-    //   void InterlockedAdd(groupshared|device T &dest, T value,
-    //                       T &original_value);
-    // Both `dest` and `original_value` are plain references, so we can use
-    // the underlying lvalue directly without HLSLOutArgExpr unwrapping.
-    LValue DestLV = EmitLValue(E->getArg(0));
-    Value *Ptr = DestLV.getAddress().emitRawPointer(*this);
-    Value *Val = EmitScalarExpr(E->getArg(1));
-    assert(E->getArg(1)->getType()->isIntegerType() &&
-           "Intrinsic InterlockedAdd value operand must be an integer");
-
-    Intrinsic::ID ID = CGM.getHLSLRuntime().getInterlockedAddIntrinsic();
-    Value *Call = EmitRuntimeCall(
-        Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID,
-                                          {Val->getType(), Ptr->getType()}),
-        ArrayRef<Value *>{Ptr, Val}, "hlsl.interlocked.add");
-
-    // The 3-arg overload writes the old value (the intrinsic's return value)
-    // into the `original_value` reference parameter.
-    if (E->getNumArgs() == 3) {
-      LValue OrigLV = EmitLValue(E->getArg(2));
-      EmitStoreThroughLValue(RValue::get(Call), OrigLV);
-    }
-    return Call;
-  }
+  case Builtin::BI__builtin_hlsl_interlocked_add:
+    return handleHlslInterlocked(
+        *this, E, CGM.getHLSLRuntime().getInterlockedAddIntrinsic(),
+        "hlsl.interlocked.add");
+  case Builtin::BI__builtin_hlsl_interlocked_or:
+    return handleHlslInterlocked(
+        *this, E, CGM.getHLSLRuntime().getInterlockedOrIntrinsic(),
+        "hlsl.interlocked.or");
   case Builtin::BI__builtin_hlsl_wave_active_ballot: {
     [[maybe_unused]] Value *Op = EmitScalarExpr(E->getArg(0));
     assert(Op->getType()->isIntegerTy(1) &&
diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h 
b/clang/lib/CodeGen/CGHLSLRuntime.h
index a126d4612a5f4..154d19ff7bd25 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.h
+++ b/clang/lib/CodeGen/CGHLSLRuntime.h
@@ -152,6 +152,7 @@ class CGHLSLRuntime {
   GENERATE_HLSL_INTRINSIC_FUNCTION(WaveActiveBitXor, wave_reduce_xor)
   GENERATE_HLSL_INTRINSIC_FUNCTION(WaveActiveBitAnd, wave_reduce_and)
   GENERATE_HLSL_INTRINSIC_FUNCTION(InterlockedAdd, interlocked_add)
+  GENERATE_HLSL_INTRINSIC_FUNCTION(InterlockedOr, interlocked_or)
   GENERATE_HLSL_INTRINSIC_FUNCTION(WaveActiveMax, wave_reduce_max)
   GENERATE_HLSL_INTRINSIC_FUNCTION(WaveActiveUMax, wave_reduce_umax)
   GENERATE_HLSL_INTRINSIC_FUNCTION(WaveActiveMin, wave_reduce_min)
diff --git a/clang/lib/Sema/HLSLExternalSemaSource.cpp 
b/clang/lib/Sema/HLSLExternalSemaSource.cpp
index 3f7255cb3f8a7..537357be5e2bd 100644
--- a/clang/lib/Sema/HLSLExternalSemaSource.cpp
+++ b/clang/lib/Sema/HLSLExternalSemaSource.cpp
@@ -760,8 +760,26 @@ static void defineHLSLInterlockedAdd(Sema &S, 
NamespaceDecl *NS) {
                             ThreeArg);
 }
 
+// Synthesize the InterlockedOr overload set: {int, uint, int64_t, uint64_t}
+// x {groupshared, device} x {2-arg, 3-arg}.
+static void defineHLSLInterlockedOr(Sema &S, NamespaceDecl *NS) {
+  ASTContext &AST = S.getASTContext();
+  // HLSL: int64_t == long, uint64_t == unsigned long (see hlsl_basic_types.h).
+  QualType Elems[] = {AST.IntTy, AST.UnsignedIntTy, AST.LongTy,
+                      AST.UnsignedLongTy};
+  LangAS AddrSpaces[] = {LangAS::hlsl_groupshared, LangAS::hlsl_device};
+
+  for (QualType ElemTy : Elems)
+    for (LangAS AS : AddrSpaces)
+      for (bool ThreeArg : {false, true})
+        buildAtomicOverload(S, NS, "InterlockedOr",
+                            "__builtin_hlsl_interlocked_or", ElemTy, AS,
+                            ThreeArg);
+}
+
 void HLSLExternalSemaSource::defineHLSLAtomicIntrinsics() {
   defineHLSLInterlockedAdd(*SemaPtr, HLSLNamespace);
+  defineHLSLInterlockedOr(*SemaPtr, HLSLNamespace);
 }
 
 void HLSLExternalSemaSource::onCompletion(CXXRecordDecl *Record,
diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index 075dc97b0aef2..e3d8e4ff22bcb 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -4534,10 +4534,11 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned 
BuiltinID, CallExpr *TheCall) {
     TheCall->setType(ArgTyExpr);
     break;
   }
-  case Builtin::BI__builtin_hlsl_interlocked_add: {
+  case Builtin::BI__builtin_hlsl_interlocked_add:
+  case Builtin::BI__builtin_hlsl_interlocked_or: {
     // The builtin's prototype in Builtins.td is `void (...)`, so direct calls
-    // to `__builtin_hlsl_interlocked_add` bypass argument checking entirely.
-    // When reached via the synthesized `InterlockedAdd` overload set in
+    // to `__builtin_hlsl_interlocked_*` bypass argument checking entirely.
+    // When reached via the synthesized `Interlocked*` overload set in
     // HLSLExternalSemaSource, overload resolution has already enforced the
     // argument count, integer-type matching, and the address-space requirement
     // on `dest`. The checks below are a safety net for callers that invoke the
diff --git a/clang/test/CodeGenHLSL/builtins/InterlockedOr.hlsl 
b/clang/test/CodeGenHLSL/builtins/InterlockedOr.hlsl
new file mode 100644
index 0000000000000..a4c4f4cc7dd6c
--- /dev/null
+++ b/clang/test/CodeGenHLSL/builtins/InterlockedOr.hlsl
@@ -0,0 +1,59 @@
+// RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -triple \
+// RUN:   dxil-pc-shadermodel6.6-compute %s -emit-llvm -disable-llvm-passes -o 
- | \
+// RUN:   FileCheck %s --check-prefixes=CHECK,DXCHECK
+
+// RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -triple \
+// RUN:   spirv-pc-vulkan-compute %s -emit-llvm -disable-llvm-passes -o - | \
+// RUN:   FileCheck %s --check-prefixes=CHECK,SPVCHECK
+
+// Test basic lowering of HLSL InterlockedOr to the target intrinsic.
+
+groupshared int  gs_i32;
+groupshared uint gs_u32;
+groupshared int64_t  gs_i64;
+groupshared uint64_t gs_u64;
+
+// CHECK-LABEL: define {{(dso_local |hidden |internal |protected |spir_func 
)*}}void @{{.*}}test_int_2arg
+// DXCHECK:  call i32 @llvm.dx.interlocked.or.i32.p3(ptr addrspace(3) 
{{.*}}@gs_i32{{.*}}, i32 %{{.*}})
+// SPVCHECK: call spir_func i32 @llvm.spv.interlocked.or.i32.p3(ptr 
addrspace(3) {{.*}}@gs_i32{{.*}}, i32 %{{.*}})
+export void test_int_2arg(int v) {
+  InterlockedOr(gs_i32, v);
+}
+
+// CHECK-LABEL: define {{(dso_local |hidden |internal |protected |spir_func 
)*}}void @{{.*}}test_uint_2arg
+// DXCHECK:  call i32 @llvm.dx.interlocked.or.i32.p3(ptr addrspace(3) 
{{.*}}@gs_u32{{.*}}, i32 %{{.*}})
+// SPVCHECK: call spir_func i32 @llvm.spv.interlocked.or.i32.p3(ptr 
addrspace(3) {{.*}}@gs_u32{{.*}}, i32 %{{.*}})
+export void test_uint_2arg(uint v) {
+  InterlockedOr(gs_u32, v);
+}
+
+// CHECK-LABEL: define {{(dso_local |hidden |internal |protected |spir_func 
)*}}void @{{.*}}test_int_3arg
+// DXCHECK:  %[[R:.*]] = call i32 @llvm.dx.interlocked.or.i32.p3(ptr 
addrspace(3) {{.*}}@gs_i32{{.*}}, i32 %{{.*}})
+// SPVCHECK: %[[R:.*]] = call spir_func i32 
@llvm.spv.interlocked.or.i32.p3(ptr addrspace(3) {{.*}}@gs_i32{{.*}}, i32 
%{{.*}})
+// CHECK:    store i32 %[[R]], ptr {{.*}}
+export void test_int_3arg(int v, out int orig) {
+  InterlockedOr(gs_i32, v, orig);
+}
+
+// CHECK-LABEL: define {{(dso_local |hidden |internal |protected |spir_func 
)*}}void @{{.*}}test_uint_3arg
+// DXCHECK:  %[[R:.*]] = call i32 @llvm.dx.interlocked.or.i32.p3(ptr 
addrspace(3) {{.*}}@gs_u32{{.*}}, i32 %{{.*}})
+// SPVCHECK: %[[R:.*]] = call spir_func i32 
@llvm.spv.interlocked.or.i32.p3(ptr addrspace(3) {{.*}}@gs_u32{{.*}}, i32 
%{{.*}})
+// CHECK:    store i32 %[[R]], ptr {{.*}}
+export void test_uint_3arg(uint v, out uint orig) {
+  InterlockedOr(gs_u32, v, orig);
+}
+
+// CHECK-LABEL: define {{(dso_local |hidden |internal |protected |spir_func 
)*}}void @{{.*}}test_int64_2arg
+// DXCHECK:  call i64 @llvm.dx.interlocked.or.i64.p3(ptr addrspace(3) 
{{.*}}@gs_i64{{.*}}, i64 %{{.*}})
+// SPVCHECK: call spir_func i64 @llvm.spv.interlocked.or.i64.p3(ptr 
addrspace(3) {{.*}}@gs_i64{{.*}}, i64 %{{.*}})
+export void test_int64_2arg(int64_t v) {
+  InterlockedOr(gs_i64, v);
+}
+
+// CHECK-LABEL: define {{(dso_local |hidden |internal |protected |spir_func 
)*}}void @{{.*}}test_uint64_3arg
+// DXCHECK:  %[[R:.*]] = call i64 @llvm.dx.interlocked.or.i64.p3(ptr 
addrspace(3) {{.*}}@gs_u64{{.*}}, i64 %{{.*}})
+// SPVCHECK: %[[R:.*]] = call spir_func i64 
@llvm.spv.interlocked.or.i64.p3(ptr addrspace(3) {{.*}}@gs_u64{{.*}}, i64 
%{{.*}})
+// CHECK:    store i64 %[[R]], ptr {{.*}}
+export void test_uint64_3arg(uint64_t v, out uint64_t orig) {
+  InterlockedOr(gs_u64, v, orig);
+}
diff --git a/clang/test/SemaHLSL/BuiltIns/InterlockedOr-errors.hlsl 
b/clang/test/SemaHLSL/BuiltIns/InterlockedOr-errors.hlsl
new file mode 100644
index 0000000000000..faa2825139ad4
--- /dev/null
+++ b/clang/test/SemaHLSL/BuiltIns/InterlockedOr-errors.hlsl
@@ -0,0 +1,100 @@
+// RUN: %clang_cc1 -std=hlsl202x -finclude-default-header \
+// RUN:   -triple dxil-pc-shadermodel6.6-library %s -emit-llvm-only \
+// RUN:   -disable-llvm-passes -verify
+
+// InterlockedOr is provided as a set of address-space-qualified overloads
+// (groupshared/device, {int,uint,int64_t,uint64_t}, 2-arg/3-arg). All arg
+// mismatches surface as "no matching function" with 16 candidates. The
+// candidate notes come from synthesized FunctionDecls with no source
+// location, so they are matched with `@*:*`.
+
+groupshared int   gs_i32;
+groupshared float gs_f32;
+struct S { int x; };
+groupshared S     gs_s;
+
+void too_few(int v) {
+  InterlockedOr(gs_i32); // expected-error{{no matching function for call to 
'InterlockedOr'}}
+  // expected-note@*:* 16 {{candidate function}}
+}
+
+void too_many(int v, int extra) {
+  int o;
+  InterlockedOr(gs_i32, v, o, extra); // expected-error{{no matching function 
for call to 'InterlockedOr'}}
+  // expected-note@*:* 16 {{candidate function}}
+}
+
+// Atomics must operate on actual addresses in groupshared or device memory;
+// passing a plain local (no address space) must not bind to any overload.
+void local_dest(int v) {
+  int dest;
+  InterlockedOr(dest, v); // expected-error{{no matching function for call to 
'InterlockedOr'}}
+  // expected-note@*:* 16 {{candidate function}}
+}
+
+void float_dest(float v) {
+  InterlockedOr(gs_f32, v); // expected-error{{no matching function for call 
to 'InterlockedOr'}}
+  // expected-note@*:* 16 {{candidate function}}
+}
+
+void struct_dest(int v) {
+  InterlockedOr(gs_s, v); // expected-error{{no matching function for call to 
'InterlockedOr'}}
+  // expected-note@*:* 16 {{candidate function}}
+}
+
+void mismatched_orig_type(int v) {
+  uint orig;
+  InterlockedOr(gs_i32, v, orig); // expected-error{{no matching function for 
call to 'InterlockedOr'}}
+  // expected-note@*:* 16 {{candidate function}}
+}
+
+// The tests below exercise direct invocations of the underlying clang builtin
+// `__builtin_hlsl_interlocked_or`. These bypass overload resolution against
+// the synthesized `InterlockedOr` overload set (the builtin's prototype in
+// Builtins.td is `void (...)`), so each error is produced by the explicit
+// checks in SemaHLSL.cpp rather than by candidate-set rejection.
+
+void direct_too_few() {
+  __builtin_hlsl_interlocked_or(gs_i32);
+  // expected-error@-1 {{too few arguments to function call, expected at least 
2, have 1}}
+}
+
+void direct_too_many(int v, int extra) {
+  int o;
+  __builtin_hlsl_interlocked_or(gs_i32, v, o, extra);
+  // expected-error@-1 {{too many arguments to function call, expected at most 
3, have 4}}
+}
+
+void direct_non_integer_dest() {
+  S local_s;
+  __builtin_hlsl_interlocked_or(local_s, 1);
+  // expected-error@-1 {{1st argument must be a scalar integer type (was 'S')}}
+}
+
+void direct_nonlvalue_dest(int v) {
+  __builtin_hlsl_interlocked_or(1, v);
+  // expected-error@-1 {{cannot bind non-lvalue argument '1' to out parameter}}
+}
+
+void direct_mismatched_value() {
+  uint uv = 1u;
+  __builtin_hlsl_interlocked_or(gs_i32, uv);
+  // expected-error@-1 {{passing 'uint' (aka 'unsigned int') to parameter of 
incompatible type 'int'}}
+}
+
+void direct_mismatched_orig(int v) {
+  uint orig;
+  __builtin_hlsl_interlocked_or(gs_i32, v, orig);
+  // expected-error@-1 {{passing 'uint' (aka 'unsigned int') to parameter of 
incompatible type 'int'}}
+}
+
+void direct_nonlvalue_orig(int v) {
+  __builtin_hlsl_interlocked_or(gs_i32, v, 1);
+  // expected-error@-1 {{cannot bind non-lvalue argument '1' to out parameter}}
+}
+
+void direct_default_as_dest(int v) {
+  int local;
+  __builtin_hlsl_interlocked_or(local, v);
+  // expected-error@-1 {{1st argument to atomic builtin must reference 
groupshared or device memory (was 'int')}}
+}
diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td 
b/llvm/include/llvm/IR/IntrinsicsDirectX.td
index af360dfc78965..d2db4905aeabe 100644
--- a/llvm/include/llvm/IR/IntrinsicsDirectX.td
+++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td
@@ -261,6 +261,10 @@ def int_dx_interlocked_add :
     DefaultAttrsIntrinsic<[llvm_anyint_ty],
                           [llvm_anyptr_ty, LLVMMatchType<0>],
                           [IntrArgMemOnly]>;
+def int_dx_interlocked_or :
+    DefaultAttrsIntrinsic<[llvm_anyint_ty],
+                          [llvm_anyptr_ty, LLVMMatchType<0>],
+                          [IntrArgMemOnly]>;
 def int_dx_wave_reduce_max : DefaultAttrsIntrinsic<[llvm_any_ty], 
[LLVMMatchType<0>], [IntrConvergent, IntrNoMem, IntrTriviallyScalarizable]>;
 def int_dx_wave_reduce_umax : DefaultAttrsIntrinsic<[llvm_anyint_ty], 
[LLVMMatchType<0>], [IntrConvergent, IntrNoMem, IntrTriviallyScalarizable]>;
 def int_dx_wave_reduce_min : DefaultAttrsIntrinsic<[llvm_any_ty], 
[LLVMMatchType<0>], [IntrConvergent, IntrNoMem, IntrTriviallyScalarizable]>;
diff --git a/llvm/include/llvm/IR/IntrinsicsSPIRV.td 
b/llvm/include/llvm/IR/IntrinsicsSPIRV.td
index 6e4cf8f7e72dc..5c59a32ddce99 100644
--- a/llvm/include/llvm/IR/IntrinsicsSPIRV.td
+++ b/llvm/include/llvm/IR/IntrinsicsSPIRV.td
@@ -152,6 +152,10 @@ def int_spv_rsqrt : 
DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty]
       DefaultAttrsIntrinsic<[llvm_anyint_ty],
                             [llvm_anyptr_ty, LLVMMatchType<0>],
                             [IntrArgMemOnly]>;
+  def int_spv_interlocked_or :
+      DefaultAttrsIntrinsic<[llvm_anyint_ty],
+                            [llvm_anyptr_ty, LLVMMatchType<0>],
+                            [IntrArgMemOnly]>;
   def int_spv_subgroup_ballot : 
ClangBuiltin<"__builtin_spirv_subgroup_ballot">,
     DefaultAttrsIntrinsic<[llvm_v4i32_ty], [llvm_i1_ty], [IntrConvergent, 
IntrNoMem]>;
   def int_spv_wave_reduce_umax : DefaultAttrsIntrinsic<[llvm_any_ty], 
[LLVMMatchType<0>], [IntrConvergent, IntrNoMem]>;
diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp 
b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
index 88eda6656d89b..62fb8d1b12891 100644
--- a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
+++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
@@ -228,6 +228,7 @@ static bool isIntrinsicExpansion(Function &F) {
   case Intrinsic::dx_step:
   case Intrinsic::dx_radians:
   case Intrinsic::dx_interlocked_add:
+  case Intrinsic::dx_interlocked_or:
   case Intrinsic::usub_sat:
   case Intrinsic::vector_reduce_add:
   case Intrinsic::vector_reduce_fadd:
@@ -771,15 +772,16 @@ static Value *expandRadiansIntrinsic(CallInst *Orig) {
   return Builder.CreateFMul(X, PiOver180);
 }
 
-static Value *expandInterlockedAddIntrinsic(CallInst *Orig) {
-  // Lower @llvm.dx.interlocked.add(ptr, val) to `atomicrmw add ptr, val
+static Value *expandInterlockedIntrinsic(CallInst *Orig,
+                                         AtomicRMWInst::BinOp Op) {
+  // Lower @llvm.dx.interlocked.<op>(ptr, val) to `atomicrmw <op> ptr, val
   // monotonic`. HLSL Interlocked operations imply no fence/barrier, which maps
   // to monotonic ordering. The instruction's result is the old value, matching
   // the intrinsic's return value.
   Value *Ptr = Orig->getArgOperand(0);
   Value *Val = Orig->getArgOperand(1);
   IRBuilder<> Builder(Orig);
-  return Builder.CreateAtomicRMW(AtomicRMWInst::Add, Ptr, Val, MaybeAlign(),
+  return Builder.CreateAtomicRMW(Op, Ptr, Val, MaybeAlign(),
                                  AtomicOrdering::Monotonic);
 }
 
@@ -1245,7 +1247,10 @@ static bool expandIntrinsic(Function &F, CallInst *Orig) 
{
     Result = expandRadiansIntrinsic(Orig);
     break;
   case Intrinsic::dx_interlocked_add:
-    Result = expandInterlockedAddIntrinsic(Orig);
+    Result = expandInterlockedIntrinsic(Orig, AtomicRMWInst::Add);
+    break;
+  case Intrinsic::dx_interlocked_or:
+    Result = expandInterlockedIntrinsic(Orig, AtomicRMWInst::Or);
     break;
   case Intrinsic::dx_resource_load_rawbuffer:
     if (expandBufferLoadIntrinsic(Orig, /*IsRaw*/ true))
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp 
b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
index cd99015a61ba9..2220fc72e3837 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
@@ -177,8 +177,8 @@ class SPIRVInstructionSelector : public InstructionSelector 
{
   bool selectAtomicRMW(Register ResVReg, SPIRVTypeInst ResType, MachineInstr 
&I,
                        unsigned NewOpcode, unsigned NegateOpcode = 0) const;
 
-  bool selectInterlockedAdd(Register ResVReg, SPIRVTypeInst ResType,
-                            MachineInstr &I) const;
+  bool selectInterlocked(Register ResVReg, SPIRVTypeInst ResType,
+                         MachineInstr &I, u...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/204923
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to