https://github.com/Alexander-Johnston updated 
https://github.com/llvm/llvm-project/pull/180804

>From 687db9e025d2269743a7a654a65897e0ce2fec72 Mon Sep 17 00:00:00 2001
From: Alexander Johnston <[email protected]>
Date: Tue, 10 Feb 2026 18:01:11 +0000
Subject: [PATCH 1/2] [HLSL][DXIL] InterlockedOr and Interlocked64 builtins

This includes the first phase of implementation of the InterlockedOr intrinsic.
This covers the usage of the intrinsic/builtin on RWByteAddressBuffers, Typed
Buffers, and Structured Buffers. Not covered are textures, groupshared memory,
and the standalone InterlockedOr(buf[index], val, ret) intrinsics.

SPIRV implementation is not covered in this commit.
---
 clang/include/clang/Basic/Builtins.td         |  24 +++
 .../clang/Basic/DiagnosticSemaKinds.td        |   3 +
 clang/lib/CodeGen/CGHLSLBuiltins.cpp          | 128 +++++++++++++
 clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.cpp |  51 +++++
 clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.h   |   2 +
 clang/lib/Sema/HLSLExternalSemaSource.cpp     |   2 +
 clang/lib/Sema/SemaHLSL.cpp                   | 179 ++++++++++++++++++
 .../builtins/Interlocked-or-builtin.hlsl      |  76 ++++++++
 .../CodeGenHLSL/builtins/Interlocked-or.hlsl  |  98 ++++++++++
 .../BuiltIns/interlocked-or-errors.hlsl       |  84 ++++++++
 .../BuiltIns/interlocked-or64-errors.hlsl     |  74 ++++++++
 llvm/include/llvm/IR/IntrinsicsDirectX.td     |   3 +
 llvm/lib/Target/DirectX/DXIL.td               |   8 +
 llvm/lib/Target/DirectX/DXILOpLowering.cpp    |  60 ++++++
 llvm/test/CodeGen/DirectX/interlocked-or.ll   | 117 ++++++++++++
 llvm/test/CodeGen/DirectX/interlocked-or64.ll | 117 ++++++++++++
 16 files changed, 1026 insertions(+)
 create mode 100644 clang/test/CodeGenHLSL/builtins/Interlocked-or-builtin.hlsl
 create mode 100644 clang/test/CodeGenHLSL/builtins/Interlocked-or.hlsl
 create mode 100644 clang/test/SemaHLSL/BuiltIns/interlocked-or-errors.hlsl
 create mode 100644 clang/test/SemaHLSL/BuiltIns/interlocked-or64-errors.hlsl
 create mode 100644 llvm/test/CodeGen/DirectX/interlocked-or.ll
 create mode 100644 llvm/test/CodeGen/DirectX/interlocked-or64.ll

diff --git a/clang/include/clang/Basic/Builtins.td 
b/clang/include/clang/Basic/Builtins.td
index 05e3af4a0e96f..374ff6470d91e 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -5379,6 +5379,30 @@ def HLSLDdyFine : LangBuiltin<"HLSL_LANG"> {
   let Prototype = "void(...)";
 }
 
+def HLSLInterlockedOr : LangBuiltin<"HLSL_LANG"> {
+  let Spellings = ["__builtin_hlsl_interlocked_or"];
+  let Attributes = [NoThrow, CustomTypeChecking];
+  let Prototype = "void(...)";
+}
+
+def HLSLInterlockedOrRet : LangBuiltin<"HLSL_LANG"> {
+  let Spellings = ["__builtin_hlsl_interlocked_or_ret"];
+  let Attributes = [NoThrow, CustomTypeChecking];
+  let Prototype = "void(...)";
+}
+
+def HLSLInterlockedOr64 : LangBuiltin<"HLSL_LANG"> {
+  let Spellings = ["__builtin_hlsl_interlocked_or64"];
+  let Attributes = [NoThrow, CustomTypeChecking];
+  let Prototype = "void(...)";
+}
+
+def HLSLInterlockedOrRet64 : LangBuiltin<"HLSL_LANG"> {
+  let Spellings = ["__builtin_hlsl_interlocked_or_ret64"];
+  let Attributes = [NoThrow, CustomTypeChecking];
+  let Prototype = "void(...)";
+}
+
 // Builtins for XRay.
 def XRayCustomEvent : Builtin {
   let Spellings = ["__xray_customevent"];
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td 
b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index f999c362307af..384611a97dee3 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -13492,6 +13492,9 @@ def err_hlsl_assign_to_global_resource: Error<
 def err_hlsl_push_constant_unique
     : Error<"cannot have more than one push constant block">;
 
+def err_hlsl_intrinsic_in_wrong_shader_model: Error<
+  "intrinsic %0 requires shader model %1 or greater">;
+
 // Layout randomization diagnostics.
 def err_non_designated_init_used : Error<
   "a randomized struct can only be initialized with a designated initializer">;
diff --git a/clang/lib/CodeGen/CGHLSLBuiltins.cpp 
b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
index c72eef1982e9e..39d716bea91bf 100644
--- a/clang/lib/CodeGen/CGHLSLBuiltins.cpp
+++ b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
@@ -300,6 +300,122 @@ static Value *handleElementwiseF32ToF16(CodeGenFunction 
&CGF,
   llvm_unreachable("Intrinsic F32ToF16 not supported by target architecture");
 }
 
+// Not sure where would be best for this to live
+// AtomicBinOp uses an i32 to determine the operation mode as follows
+enum AtomicOperationCode : uint {
+  Add = 0,
+  And = 1,
+  Or = 2,
+  Xor = 3,
+  IMin = 4,
+  IMax = 5,
+  UMin = 6,
+  UMax = 7,
+  Exchange = 8
+};
+
+static Value *handleAtomicBinOp(CodeGenFunction &CGF, const CallExpr *E,
+                                const AtomicOperationCode OpCode,
+                                const bool HasReturn, const bool Is32Bit) {
+  Value *HandleOp = CGF.EmitScalarExpr(E->getArg(0));
+  Value *IndexOp = CGF.EmitScalarExpr(E->getArg(1));
+  Value *StructuredBufIndexOp;
+  Value *NewValueOp;
+  Value *OldValueOp;
+  unsigned OldValueArgIdx;
+  if (E->getNumArgs() == 3) {
+    // (handle, index, newValue)
+    NewValueOp = CGF.EmitScalarExpr(E->getArg(2));
+  } else if (E->getNumArgs() == 4) {
+    if (HasReturn) {
+      // (handle, index, newValue, oldValue)
+      NewValueOp = CGF.EmitScalarExpr(E->getArg(2));
+      OldValueArgIdx = 3;
+    } else {
+      // (handle, index, index, newValue)
+      StructuredBufIndexOp = CGF.EmitScalarExpr(E->getArg(2));
+      NewValueOp = CGF.EmitScalarExpr(E->getArg(3));
+    }
+  } else {
+    // (handle, index, index, newValue, oldValue)
+    StructuredBufIndexOp = CGF.EmitScalarExpr(E->getArg(2));
+    NewValueOp = CGF.EmitScalarExpr(E->getArg(3));
+    OldValueArgIdx = 4;
+  }
+
+  switch (CGF.CGM.getTarget().getTriple().getArch()) {
+  case llvm::Triple::dxil: {
+    QualType HandleTy = E->getArg(0)->getType();
+    const HLSLAttributedResourceType *ResourceTy =
+        HandleTy->getAs<HLSLAttributedResourceType>();
+
+    // AtomicBinOp uses an i32 to determine the operation mode as follows
+    // Add: 0, And: 1, Or: 2, Xor: 3, IMin: 4, IMax: 5, UMin: 6, UMax: 7,
+    // Exchange: 8
+    Value *ModeConstant = ConstantInt::get(CGF.Int32Ty, OpCode);
+
+    // AtomicBinOp has 3 coordinate params which must be handled differently
+    // depending on the resource type being accessed.
+    // Initially undef all the coordinates then fill as required
+    Value *Undef = UndefValue::get(CGF.Int32Ty);
+    Value *C0 = Undef;
+    Value *C1 = Undef;
+    Value *C2 = Undef;
+    if (!ResourceTy->getAttrs().RawBuffer) {
+      assert(
+          (ResourceTy->getContainedType() == CGF.getContext().IntTy ||
+           ResourceTy->getContainedType() == CGF.getContext().UnsignedIntTy) &&
+          "AtomicBinOp RWBuffer must contain int or uint");
+      // RWBuffer: c0
+      C0 = IndexOp;
+
+      // RWByteAddressBuffers are output as char8_t, but as that isn't
+      // recognised by HLSL we can't use it as an attribute to define them in
+      // tests, so must also check for char ([[hlsl::contained_type(char)]])
+    } else if (ResourceTy->getContainedType() == CGF.getContext().Char8Ty ||
+               ResourceTy->getContainedType() == CGF.getContext().CharTy) {
+      // RWByteAddressBuffer: c0
+      C0 = IndexOp;
+    } else {
+      // RWStructuredBuffer: c0 and c1
+      C0 = IndexOp;
+      C1 = StructuredBufIndexOp;
+    }
+    assert(C0 != Undef && "Failed to identify coordinates for Interlocked");
+    // TODO: Add coordinate logic for texture and groupshared
+
+    // atomicBinOp
+    // opcode, handle, binary operation code, coordinates c0, c1, c2, new val
+    if (Is32Bit) {
+      Intrinsic::ID ID = Intrinsic::dx_resource_atomicbinop;
+      OldValueOp = CGF.Builder.CreateIntrinsic(
+          /*ReturnType=*/CGF.Int32Ty, ID,
+          ArrayRef<Value *>{HandleOp, ModeConstant, C0, C1, C2, NewValueOp},
+          nullptr, "hlsl.interlocked.or");
+    } else {
+      Intrinsic::ID ID = Intrinsic::dx_resource_atomicbinop64;
+      OldValueOp = CGF.Builder.CreateIntrinsic(
+          /*ReturnType=*/CGF.Int64Ty, ID,
+          ArrayRef<Value *>{HandleOp, ModeConstant, C0, C1, C2, NewValueOp},
+          nullptr, "hlsl.interlocked.or");
+    }
+    break;
+  }
+  default:
+    llvm_unreachable(
+        "Interlocked intrinsic not supported by target architecture");
+  }
+
+  // Destination may or may not be provided
+  // If it is provided create a store to it
+  if (HasReturn) {
+    LValue DestOp = CGF.EmitLValue(E->getArg(OldValueArgIdx));
+    return CGF.Builder.CreateStore(OldValueOp, DestOp.getAddress());
+  } else {
+    return OldValueOp;
+  }
+}
+
 static Value *emitBufferStride(CodeGenFunction *CGF, const Expr *HandleExpr,
                                LValue &Stride) {
   // Figure out the stride of the buffer elements from the handle type.
@@ -1181,6 +1297,18 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned 
BuiltinID,
     llvm::Value *Args[] = {SpecId, DefaultVal};
     return Builder.CreateCall(SpecConstantFn, Args);
   }
+  case Builtin::BI__builtin_hlsl_interlocked_or: {
+    return handleAtomicBinOp(*this, E, AtomicOperationCode::Or, false, true);
+  }
+  case Builtin::BI__builtin_hlsl_interlocked_or64: {
+    return handleAtomicBinOp(*this, E, AtomicOperationCode::Or, false, false);
+  }
+  case Builtin::BI__builtin_hlsl_interlocked_or_ret: {
+    return handleAtomicBinOp(*this, E, AtomicOperationCode::Or, true, true);
+  }
+  case Builtin::BI__builtin_hlsl_interlocked_or_ret64: {
+    return handleAtomicBinOp(*this, E, AtomicOperationCode::Or, true, false);
+  }
   }
   return nullptr;
 }
diff --git a/clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.cpp 
b/clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.cpp
index 1dd7fd6fac455..25c913625b951 100644
--- a/clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.cpp
+++ b/clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.cpp
@@ -22,6 +22,7 @@
 #include "clang/AST/Type.h"
 #include "clang/Basic/SourceLocation.h"
 #include "clang/Basic/Specifiers.h"
+#include "clang/Basic/TargetInfo.h"
 #include "clang/Sema/Lookup.h"
 #include "clang/Sema/Sema.h"
 #include "clang/Sema/SemaHLSL.h"
@@ -1605,5 +1606,55 @@ 
BuiltinTypeDeclBuilder::addGetDimensionsMethodForBuffer() {
       .finalize();
 }
 
+BuiltinTypeDeclBuilder &
+BuiltinTypeDeclBuilder::addInterlockedMethodsForBuffer() {
+  using PH = BuiltinTypeMethodBuilder::PlaceHolder;
+  ASTContext &AST = SemaRef.getASTContext();
+  QualType UIntTy = AST.UnsignedIntTy;
+
+  BuiltinTypeMethodBuilder(*this, "InterlockedOr", AST.VoidTy)
+      .addParam("dest", UIntTy, HLSLParamModifierAttr::Keyword_in)
+      .addParam("value", UIntTy, HLSLParamModifierAttr::Keyword_in)
+      .callBuiltin("__builtin_hlsl_interlocked_or", QualType(), PH::Handle,
+                   PH::_0, PH::_1)
+      .finalize();
+
+  return BuiltinTypeMethodBuilder(*this, "InterlockedOr", AST.VoidTy)
+      .addParam("dest", UIntTy, HLSLParamModifierAttr::Keyword_in)
+      .addParam("value", UIntTy, HLSLParamModifierAttr::Keyword_in)
+      .addParam("original_value", UIntTy, HLSLParamModifierAttr::Keyword_out)
+      .callBuiltin("__builtin_hlsl_interlocked_or_ret", UIntTy, PH::Handle,
+                   PH::_0, PH::_1, PH::_2)
+      .finalize();
+}
+
+BuiltinTypeDeclBuilder &
+BuiltinTypeDeclBuilder::addInterlocked64MethodsForBuffer() {
+  ASTContext &AST = SemaRef.getASTContext();
+  VersionTuple TargetVersion = AST.getTargetInfo().getTriple().getOSVersion();
+  bool IsDXIL = AST.getTargetInfo().getTriple().getArch() == 
llvm::Triple::dxil;
+  if (TargetVersion < VersionTuple(6, 6) && IsDXIL)
+    return *this;
+
+  using PH = BuiltinTypeMethodBuilder::PlaceHolder;
+  QualType UIntTy = AST.UnsignedIntTy;
+  QualType ULongTy = AST.UnsignedLongTy;
+
+  BuiltinTypeMethodBuilder(*this, "InterlockedOr64", AST.VoidTy)
+      .addParam("dest", UIntTy, HLSLParamModifierAttr::Keyword_in)
+      .addParam("value", ULongTy, HLSLParamModifierAttr::Keyword_in)
+      .callBuiltin("__builtin_hlsl_interlocked_or64", QualType(), PH::Handle,
+                   PH::_0, PH::_1)
+      .finalize();
+
+  return BuiltinTypeMethodBuilder(*this, "InterlockedOr64", AST.VoidTy)
+      .addParam("dest", UIntTy, HLSLParamModifierAttr::Keyword_in)
+      .addParam("value", ULongTy, HLSLParamModifierAttr::Keyword_in)
+      .addParam("original_value", ULongTy, HLSLParamModifierAttr::Keyword_out)
+      .callBuiltin("__builtin_hlsl_interlocked_or_ret64", ULongTy, PH::Handle,
+                   PH::_0, PH::_1, PH::_2)
+      .finalize();
+}
+
 } // namespace hlsl
 } // namespace clang
diff --git a/clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.h 
b/clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.h
index aa6967e1eb725..29d5eff91a0bc 100644
--- a/clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.h
+++ b/clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.h
@@ -109,6 +109,8 @@ class BuiltinTypeDeclBuilder {
   BuiltinTypeDeclBuilder &addConsumeMethod();
 
   BuiltinTypeDeclBuilder &addGetDimensionsMethodForBuffer();
+  BuiltinTypeDeclBuilder &addInterlockedMethodsForBuffer();
+  BuiltinTypeDeclBuilder &addInterlocked64MethodsForBuffer();
 
 private:
   BuiltinTypeDeclBuilder &addCreateFromBinding();
diff --git a/clang/lib/Sema/HLSLExternalSemaSource.cpp 
b/clang/lib/Sema/HLSLExternalSemaSource.cpp
index f7862b3a3f594..8836e2f0ab2cb 100644
--- a/clang/lib/Sema/HLSLExternalSemaSource.cpp
+++ b/clang/lib/Sema/HLSLExternalSemaSource.cpp
@@ -515,6 +515,8 @@ void 
HLSLExternalSemaSource::defineHLSLTypesWithForwardDeclarations() {
         .addByteAddressBufferLoadMethods()
         .addByteAddressBufferStoreMethods()
         .addGetDimensionsMethodForBuffer()
+        .addInterlockedMethodsForBuffer()
+        .addInterlocked64MethodsForBuffer()
         .completeDefinition();
   });
   Decl = BuiltinTypeDeclBuilder(*SemaPtr, HLSLNamespace,
diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index 3e99a8f7d89d1..c3ed6a59e80c0 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -3325,6 +3325,21 @@ static bool CheckVectorElementCount(Sema *S, QualType 
PassedType,
   return false;
 }
 
+static bool CheckShaderModelVersion(Sema *S, CallExpr *TheCall,
+                                    VersionTuple MinimumSMVersion) {
+  bool IsDXIL = S->getASTContext().getTargetInfo().getTriple().getArch() ==
+                llvm::Triple::dxil;
+  llvm::VersionTuple SMVersion =
+      S->getASTContext().getTargetInfo().getTriple().getOSVersion();
+  if (SMVersion < MinimumSMVersion && IsDXIL) {
+    S->Diag(TheCall->getBeginLoc(),
+            diag::err_hlsl_intrinsic_in_wrong_shader_model)
+        << TheCall << MinimumSMVersion.getAsString();
+    return true;
+  }
+  return false;
+}
+
 // Note: returning true in this case results in CheckBuiltinFunctionCall
 // returning an ExprError
 bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) 
{
@@ -3924,6 +3939,170 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned 
BuiltinID, CallExpr *TheCall) {
                                getASTContext().UnsignedIntTy);
     break;
   }
+  case Builtin::BI__builtin_hlsl_interlocked_or: {
+    if (SemaRef.checkArgCountRange(TheCall, 3, 4))
+      return true;
+    auto checkResTy = [this](const HLSLAttributedResourceType *ResTy) -> bool {
+      bool IsValid = false;
+      const ASTContext &AST = SemaRef.getASTContext();
+      // The resource handle must be either
+      // RWByteAddressBuffer or RWStructuredBuffer
+      IsValid |= ResTy->getAttrs().ResourceClass == ResourceClass::UAV &&
+                 ResTy->isRaw() && ResTy->hasContainedType();
+      // RWBuffer<int> or RWBuffer<uint>
+      IsValid |= ResTy->getAttrs().ResourceClass == ResourceClass::UAV &&
+                 !ResTy->isRaw() && ResTy->hasContainedType() &&
+                 (ResTy->getContainedType() == AST.IntTy ||
+                  ResTy->getContainedType() == AST.UnsignedIntTy);
+      // RWTexture<int> or RWTexture<uint> (any dimension)
+      IsValid |= ResTy->getAttrs().ResourceClass == ResourceClass::UAV &&
+                 !ResTy->isRaw() &&
+                 ResTy->getAttrs().ResourceDimension !=
+                     llvm::dxil::ResourceDimension::Unknown &&
+                 (ResTy->getContainedType() == AST.IntTy ||
+                  ResTy->getContainedType() == AST.UnsignedIntTy);
+      return !IsValid;
+    };
+    if (CheckResourceHandle(&SemaRef, TheCall, 0, checkResTy))
+      return true;
+
+    if (CheckArgTypeMatches(&SemaRef, TheCall->getArg(1),
+                            SemaRef.getASTContext().UnsignedIntTy) ||
+        CheckArgTypeMatches(&SemaRef, TheCall->getArg(2),
+                            SemaRef.getASTContext().UnsignedIntTy))
+      return true;
+    // We will have a second index if handling a RWStructuredBuffer
+    if (TheCall->getNumArgs() == 4)
+      if (CheckArgTypeMatches(&SemaRef, TheCall->getArg(3),
+                              SemaRef.getASTContext().UnsignedIntTy))
+        return true;
+
+    TheCall->setType(SemaRef.getASTContext().VoidTy);
+    break;
+  }
+  case Builtin::BI__builtin_hlsl_interlocked_or_ret: {
+    if (SemaRef.checkArgCountRange(TheCall, 4, 5))
+      return true;
+    auto checkResTy = [this](const HLSLAttributedResourceType *ResTy) -> bool {
+      bool IsValid = false;
+      const ASTContext &AST = SemaRef.getASTContext();
+      // The resource handle must be either
+      // RWByteAddressBuffer or RWStructuredBuffer
+      IsValid |= ResTy->getAttrs().ResourceClass == ResourceClass::UAV &&
+                 ResTy->getAttrs().RawBuffer && ResTy->hasContainedType();
+      // RWBuffer<int> or RWBuffer<uint>
+      IsValid |= ResTy->getAttrs().ResourceClass == ResourceClass::UAV &&
+                 !ResTy->getAttrs().RawBuffer && ResTy->hasContainedType() &&
+                 (ResTy->getContainedType() == AST.IntTy ||
+                  ResTy->getContainedType() == AST.UnsignedIntTy);
+      // TODO: Handle Texture types when implemented
+      return !IsValid;
+    };
+    if (CheckResourceHandle(&SemaRef, TheCall, 0, checkResTy))
+      return true;
+
+    if (CheckArgTypeMatches(&SemaRef, TheCall->getArg(1),
+                            SemaRef.getASTContext().UnsignedIntTy) ||
+        CheckArgTypeMatches(&SemaRef, TheCall->getArg(2),
+                            SemaRef.getASTContext().UnsignedIntTy) ||
+        CheckArgTypeMatches(&SemaRef, TheCall->getArg(3),
+                            SemaRef.getASTContext().UnsignedIntTy))
+      return true;
+    // We will have a second index if handling a RWStructuredBuffer
+    if (TheCall->getNumArgs() == 5)
+      if (CheckArgTypeMatches(&SemaRef, TheCall->getArg(4),
+                              SemaRef.getASTContext().UnsignedIntTy))
+        return true;
+
+    TheCall->setType(SemaRef.getASTContext().UnsignedIntTy);
+    break;
+  }
+  case Builtin::BI__builtin_hlsl_interlocked_or64: {
+    if (SemaRef.checkArgCountRange(TheCall, 3, 4))
+      return true;
+    if (CheckShaderModelVersion(&SemaRef, TheCall, VersionTuple(6, 6)))
+      return true;
+    auto checkResTy = [this](const HLSLAttributedResourceType *ResTy) -> bool {
+      bool IsValid = false;
+      const ASTContext &AST = SemaRef.getASTContext();
+      // The resource handle must be either
+      // RWByteAddressBuffer or RWStructuredBuffer
+      IsValid |= ResTy->getAttrs().ResourceClass == ResourceClass::UAV &&
+                 ResTy->getAttrs().RawBuffer && ResTy->hasContainedType();
+      // RWBuffer<int> or RWBuffer<uint>
+      IsValid |= ResTy->getAttrs().ResourceClass == ResourceClass::UAV &&
+                 !ResTy->getAttrs().RawBuffer && ResTy->hasContainedType() &&
+                 (ResTy->getContainedType() == AST.LongTy ||
+                  ResTy->getContainedType() == AST.UnsignedLongTy);
+      // TODO: Handle Texture types when implemented
+      return !IsValid;
+    };
+    if (CheckResourceHandle(&SemaRef, TheCall, 0, checkResTy))
+      return true;
+
+    if (CheckArgTypeMatches(&SemaRef, TheCall->getArg(1),
+                            SemaRef.getASTContext().UnsignedIntTy))
+      return true;
+    // We will have a second index if handling a RWStructuredBuffer
+    if (TheCall->getNumArgs() == 4) {
+      if (CheckArgTypeMatches(&SemaRef, TheCall->getArg(2),
+                              SemaRef.getASTContext().UnsignedIntTy) ||
+          CheckArgTypeMatches(&SemaRef, TheCall->getArg(3),
+                              SemaRef.getASTContext().UnsignedLongTy))
+        return true;
+    } else {
+      if (CheckArgTypeMatches(&SemaRef, TheCall->getArg(2),
+                              SemaRef.getASTContext().UnsignedLongTy))
+        return true;
+    }
+
+    TheCall->setType(SemaRef.getASTContext().VoidTy);
+    break;
+  }
+  case Builtin::BI__builtin_hlsl_interlocked_or_ret64: {
+    if (SemaRef.checkArgCountRange(TheCall, 4, 5))
+      return true;
+    if (CheckShaderModelVersion(&SemaRef, TheCall, VersionTuple(6, 6)))
+      return true;
+    auto checkResTy = [this](const HLSLAttributedResourceType *ResTy) -> bool {
+      bool IsValid = false;
+      const ASTContext &AST = SemaRef.getASTContext();
+      // The resource handle must be either
+      // RWByteAddressBuffer or RWStructuredBuffer
+      IsValid |= ResTy->getAttrs().ResourceClass == ResourceClass::UAV &&
+                 ResTy->getAttrs().RawBuffer && ResTy->hasContainedType();
+      // RWBuffer<int> or RWBuffer<uint>
+      IsValid |= ResTy->getAttrs().ResourceClass == ResourceClass::UAV &&
+                 !ResTy->getAttrs().RawBuffer && ResTy->hasContainedType() &&
+                 (ResTy->getContainedType() == AST.LongTy ||
+                  ResTy->getContainedType() == AST.UnsignedLongTy);
+      // TODO: Handle Texture types when implemented
+      return !IsValid;
+    };
+    if (CheckResourceHandle(&SemaRef, TheCall, 0, checkResTy))
+      return true;
+
+    if (CheckArgTypeMatches(&SemaRef, TheCall->getArg(1),
+                            SemaRef.getASTContext().UnsignedIntTy) ||
+        CheckArgTypeMatches(&SemaRef, TheCall->getArg(3),
+                            SemaRef.getASTContext().UnsignedLongTy))
+      return true;
+    // We will have a second index if handling a RWStructuredBuffer
+    if (TheCall->getNumArgs() == 5) {
+      if (CheckArgTypeMatches(&SemaRef, TheCall->getArg(2),
+                              SemaRef.getASTContext().UnsignedIntTy) ||
+          CheckArgTypeMatches(&SemaRef, TheCall->getArg(4),
+                              SemaRef.getASTContext().UnsignedLongTy))
+        return true;
+    } else {
+      if (CheckArgTypeMatches(&SemaRef, TheCall->getArg(2),
+                              SemaRef.getASTContext().UnsignedLongTy))
+        return true;
+    }
+
+    TheCall->setType(SemaRef.getASTContext().UnsignedLongTy);
+    break;
+  }
   }
   return false;
 }
diff --git a/clang/test/CodeGenHLSL/builtins/Interlocked-or-builtin.hlsl 
b/clang/test/CodeGenHLSL/builtins/Interlocked-or-builtin.hlsl
new file mode 100644
index 0000000000000..fa2a75f2309e9
--- /dev/null
+++ b/clang/test/CodeGenHLSL/builtins/Interlocked-or-builtin.hlsl
@@ -0,0 +1,76 @@
+// RUN: %clang_cc1 -finclude-default-header  -x hlsl  -triple 
dxil-pc-shadermodel6.6-library %s \
+// RUN:  -emit-llvm -disable-llvm-passes -o - -DBYTEADDRESS | FileCheck %s 
--check-prefixes=CHECK-BYTEADDRESS
+// RUN: %clang_cc1 -finclude-default-header  -x hlsl  -triple 
dxil-pc-shadermodel6.6-library %s \
+// RUN:  -emit-llvm -disable-llvm-passes -o - -DINTBUF | FileCheck %s 
--check-prefixes=CHECK-INTBUF
+// RUN: %clang_cc1 -finclude-default-header  -x hlsl  -triple 
dxil-pc-shadermodel6.6-library %s \
+// RUN:  -emit-llvm -disable-llvm-passes -o - -DUINTBUF | FileCheck %s 
--check-prefixes=CHECK-UINTBUF
+// RUN: %clang_cc1 -finclude-default-header  -x hlsl  -triple 
dxil-pc-shadermodel6.6-library %s \
+// RUN:  -emit-llvm -disable-llvm-passes -o - -DSTRUCTURED | FileCheck %s 
--check-prefixes=CHECK-STRUCTURED
+
+#ifdef BYTEADDRESS
+using handle_t = __hlsl_resource_t [[hlsl::resource_class(UAV)]] 
[[hlsl::raw_buffer]] [[hlsl::contained_type(char)]];
+#endif
+#ifdef INTBUF
+using handle_t = __hlsl_resource_t [[hlsl::resource_class(UAV)]] 
[[hlsl::contained_type(int)]];
+#endif
+#ifdef UINTBUF
+using handle_t = __hlsl_resource_t [[hlsl::resource_class(UAV)]] 
[[hlsl::contained_type(unsigned int)]];
+#endif
+#ifdef STRUCTURED
+struct TestStruct {
+  int a;
+  unsigned int b;
+};
+
+using handle_t = __hlsl_resource_t [[hlsl::resource_class(UAV)]] 
[[hlsl::raw_buffer]] [[hlsl::contained_type(TestStruct)]];
+#endif
+
+struct CustomResource {
+  handle_t h;
+};
+
+#ifndef STRUCTURED
+
+// CHECK-LABEL: define {{.*}} i32 @_Z11test_return14CustomResource(
+// CHECK-BYTEADDRESS: %hlsl.interlocked.or = call i32 
@llvm.dx.resource.atomicbinop.tdx.RawBuffer_i8_1_0t(target("dx.RawBuffer", i8, 
1, 0) {{%.*}}, i32 2, i32 1, i32 undef, i32 undef, i32 0)
+// CHECK-INTBUF: %hlsl.interlocked.or = call i32 
@llvm.dx.resource.atomicbinop.tdx.TypedBuffer_i32_1_0_1t(target("dx.TypedBuffer",
 i32, 1, 0, 1) {{%.*}}, i32 2, i32 1, i32 undef, i32 undef, i32 0)
+// CHECK-UINTBUF: %hlsl.interlocked.or = call i32 
@llvm.dx.resource.atomicbinop.tdx.TypedBuffer_i32_1_0_0t(target("dx.TypedBuffer",
 i32, 1, 0, 0) {{%.*}}, i32 2, i32 1, i32 undef, i32 undef, i32 0)
+// CHECK-NEXT: store i32 %hlsl.interlocked.or, ptr [[returnVal:%.*]], align 4
+// CHECK-NEXT: [[loadedReturnVal:%.*]] = load i32, ptr [[returnVal]], align 4
+// CHECK-NEXT: ret i32 [[loadedReturnVal]]
+unsigned int test_return(CustomResource cr) {
+  unsigned int returnVal = 0u;
+  __builtin_hlsl_interlocked_or_ret(cr.h, 1u, 0u, returnVal);
+  return returnVal;
+}
+
+// CHECK-LABEL: define {{.*}} void @_Z14test_no_return14CustomResource(
+// CHECK-BYTEADDRESS: %hlsl.interlocked.or = call i32 
@llvm.dx.resource.atomicbinop.tdx.RawBuffer_i8_1_0t(target("dx.RawBuffer", i8, 
1, 0) {{%.*}}, i32 2, i32 1, i32 undef, i32 undef, i32 0)
+// CHECK-INTBUF: %hlsl.interlocked.or = call i32 
@llvm.dx.resource.atomicbinop.tdx.TypedBuffer_i32_1_0_1t(target("dx.TypedBuffer",
 i32, 1, 0, 1) {{%.*}}, i32 2, i32 1, i32 undef, i32 undef, i32 0)
+// CHECK-UINTBUF: %hlsl.interlocked.or = call i32 
@llvm.dx.resource.atomicbinop.tdx.TypedBuffer_i32_1_0_0t(target("dx.TypedBuffer",
 i32, 1, 0, 0) {{%.*}}, i32 2, i32 1, i32 undef, i32 undef, i32 0)
+// CHECK-NEXT: ret void
+void test_no_return(CustomResource h) {
+  __builtin_hlsl_interlocked_or(h.h, 1u, 0u);
+}
+
+#else
+
+// CHECK-STRUCTURED-LABEL: define {{.*}} i32 @_Z11test_return14CustomResource(
+// CHECK-STRUCTURED: %hlsl.interlocked.or = call i32 
@llvm.dx.resource.atomicbinop.tdx.RawBuffer_s_struct.TestStructs_1_0t(target("dx.RawBuffer",
 %struct.TestStruct, 1, 0) %0, i32 2, i32 1, i32 4, i32 undef, i32 0)
+// CHECK-STRUCTURED-NEXT: store i32 %hlsl.interlocked.or, ptr 
[[returnVal:%.*]], align 4
+// CHECK-STRUCTURED-NEXT: [[loadedReturnVal:%.*]] = load i32, ptr 
[[returnVal]], align 4
+// CHECK-STRUCTURED-NEXT: ret i32 [[loadedReturnVal]]
+unsigned int test_return(CustomResource cr) {
+  unsigned int returnVal = 0u;
+  __builtin_hlsl_interlocked_or_ret(cr.h, 1u, 4u, 0u, returnVal);
+  return returnVal;
+}
+
+// CHECK-STRUCTURED-LABEL: define {{.*}} void 
@_Z14test_no_return14CustomResource(
+// CHECK-STRUCTURED: %hlsl.interlocked.or = call i32 
@llvm.dx.resource.atomicbinop.tdx.RawBuffer_s_struct.TestStructs_1_0t(target("dx.RawBuffer",
 %struct.TestStruct, 1, 0) %0, i32 2, i32 1, i32 4, i32 undef, i32 0)
+// CHECK-STRUCTURED-NEXT: ret void
+void test_no_return(CustomResource h) {
+  __builtin_hlsl_interlocked_or(h.h, 1u, 4u, 0u);
+}
+
+#endif
diff --git a/clang/test/CodeGenHLSL/builtins/Interlocked-or.hlsl 
b/clang/test/CodeGenHLSL/builtins/Interlocked-or.hlsl
new file mode 100644
index 0000000000000..85caf9aeb4120
--- /dev/null
+++ b/clang/test/CodeGenHLSL/builtins/Interlocked-or.hlsl
@@ -0,0 +1,98 @@
+// RUN: %clang_cc1 -finclude-default-header  -x hlsl  -triple 
dxil-pc-shadermodel6.0-library %s \
+// RUN:  -emit-llvm -disable-llvm-passes -o - -DINTERLOCKED32 | \
+// RUN:  FileCheck %s --check-prefixes=CHECK-32
+// RUN: %clang_cc1 -finclude-default-header  -x hlsl  -triple 
dxil-pc-shadermodel6.6-library %s \
+// RUN:  -emit-llvm -disable-llvm-passes -o - -DINTERLOCKED64 | \
+// RUN:  FileCheck %s --check-prefixes=CHECK-64
+
+RWByteAddressBuffer buf: register(u0);
+
+// CHECK: %"class.hlsl::RWByteAddressBuffer" = type { target("dx.RawBuffer", 
i8, 1, 0) }
+
+#ifdef INTERLOCKED32
+
+// CHECK-32-LABEL: define {{.*}} @_Z11test_return
+// CHECK-32: call void @_ZN4hlsl19RWByteAddressBuffer13InterlockedOrEjjRj
+// CHECK-32: ret i32 {{%.*}}
+uint test_return() {
+  uint returnVal;
+  buf.InterlockedOr(0, 0, returnVal);
+  return returnVal;
+}
+
+// CHECK-32-LABEL: define {{.*}} 
@_ZN4hlsl19RWByteAddressBuffer13InterlockedOrEjjRj(
+// CHECK-32: [[this_addr:%.*]] = alloca ptr
+// CHECK-32: [[original_val:%.*]] = alloca ptr
+// CHECK-32: [[this:%.*]] = load ptr, ptr [[this_addr]]
+// CHECK-32: [[handle:%.*]] = getelementptr inbounds nuw 
%"class.hlsl::RWByteAddressBuffer", ptr [[this]], i32 0, i32 0
+// CHECK-32: [[buf:%.*]] = load target("dx.RawBuffer", i8, 1, 0), ptr 
[[handle]]
+// CHECK-32: [[dest:%.*]] = load i32, ptr %dest.addr
+// CHECK-32: [[newval:%.*]] = load i32, ptr %value.addr
+// CHECK-32: [[result:%.*]] = call i32 
@llvm.dx.resource.atomicbinop.tdx.RawBuffer_i8_1_0t(target("dx.RawBuffer", i8, 
1, 0) [[buf]], i32 2, i32 [[dest]], i32 undef, i32 undef, i32 [[newval]])
+// CHECK-32: [[loaded_orig_val_ptr:%.*]] = load ptr, ptr [[original_val]]
+// CHECK-32: store i32 [[result]], ptr [[loaded_orig_val_ptr]]
+
+// CHECK-32-LABEL: define {{.*}} @_Z14test_no_return
+// CHECK-32: call void @_ZN4hlsl19RWByteAddressBuffer13InterlockedOrEjj
+// CHECK-32: ret void
+void test_no_return() {
+  buf.InterlockedOr(0, 0);
+}
+
+// CHECK-32-LABEL: define {{.*}} void 
@_ZN4hlsl19RWByteAddressBuffer13InterlockedOrEjj(
+// CHECK-32: [[this_addr:%.*]] = alloca ptr
+// CHECK-32: [[this:%.*]] = load ptr, ptr [[this_addr]]
+// CHECK-32: [[handle:%.*]] = getelementptr inbounds nuw 
%"class.hlsl::RWByteAddressBuffer", ptr [[this]], i32 0, i32 0
+// CHECK-32: [[buf:%.*]] = load target("dx.RawBuffer", i8, 1, 0), ptr 
[[handle]]
+// CHECK-32: [[dest:%.*]] = load i32, ptr %dest.addr
+// CHECK-32: [[newval:%.*]] = load i32, ptr %value.addr
+// CHECK-32: {{%.*}} = call i32 
@llvm.dx.resource.atomicbinop.tdx.RawBuffer_i8_1_0t(target("dx.RawBuffer", i8, 
1, 0) [[buf]], i32 2, i32 [[dest]], i32 undef, i32 undef, i32 [[newval]])
+// CHECK-32: ret void
+
+// CHECK-32: declare i32 
@llvm.dx.resource.atomicbinop.tdx.RawBuffer_i8_1_0t(target("dx.RawBuffer", i8, 
1, 0), i32, i32, i32, i32, i32)
+
+#endif
+
+#ifdef INTERLOCKED64
+
+// CHECK-LABEL: define {{.*}} @_Z13test_return64
+// CHECK: call void @_ZN4hlsl19RWByteAddressBuffer15InterlockedOr64EjmRm
+// CHECK: ret i64 {{%.*}}
+uint64_t test_return64() {
+  uint64_t returnVal;
+  buf.InterlockedOr64(0, 0, returnVal);
+  return returnVal;
+}
+
+// CHECK-64-LABEL: define {{.*}} void 
@_ZN4hlsl19RWByteAddressBuffer15InterlockedOr64EjmRm(
+// CHECK-64: [[this_addr:%.*]] = alloca ptr
+// CHECK-64: [[original_val:%.*]] = alloca ptr
+// CHECK-64: [[this:%.*]] = load ptr, ptr [[this_addr]]
+// CHECK-64: [[handle:%.*]] = getelementptr inbounds nuw 
%"class.hlsl::RWByteAddressBuffer", ptr [[this]], i32 0, i32 0
+// CHECK-64: [[buf:%.*]] = load target("dx.RawBuffer", i8, 1, 0), ptr 
[[handle]]
+// CHECK-64: [[dest:%.*]] = load i32, ptr %dest.addr
+// CHECK-64: [[newval:%.*]] = load i64, ptr %value.addr
+// CHECK-64: [[result:%.*]] = call i64 
@llvm.dx.resource.atomicbinop64.tdx.RawBuffer_i8_1_0t(target("dx.RawBuffer", 
i8, 1, 0) [[buf]], i32 2, i32 [[dest]], i32 undef, i32 undef, i64 [[newval]])
+// CHECK-64: [[loaded_orig_val_ptr:%.*]] = load ptr, ptr [[original_val]]
+// CHECK-64: store i64 [[result]], ptr [[loaded_orig_val_ptr]]
+
+// CHECK-LABEL: define {{.*}} @_Z16test_no_return64
+// CHECK: call void @_ZN4hlsl19RWByteAddressBuffer15InterlockedOr64Ejm
+// CHECK: ret void
+void test_no_return64() {
+  buf.InterlockedOr64(0, 0);
+}
+
+// CHECK-64-LABEL: define {{.*}} void 
@_ZN4hlsl19RWByteAddressBuffer15InterlockedOr64Ejm(
+// CHECK-64: [[this_addr:%.*]] = alloca ptr
+// CHECK-64: [[this:%.*]] = load ptr, ptr [[this_addr]]
+// CHECK-64: [[handle:%.*]] = getelementptr inbounds nuw 
%"class.hlsl::RWByteAddressBuffer", ptr [[this]], i32 0, i32 0
+// CHECK-64: [[buf:%.*]] = load target("dx.RawBuffer", i8, 1, 0), ptr 
[[handle]]
+// CHECK-64: [[dest:%.*]] = load i32, ptr %dest.addr
+// CHECK-64: [[newval:%.*]] = load i64, ptr %value.addr
+// CHECK-64: {{.*}} = call i64 
@llvm.dx.resource.atomicbinop64.tdx.RawBuffer_i8_1_0t(target("dx.RawBuffer", 
i8, 1, 0) [[buf]], i32 2, i32 [[dest]], i32 undef, i32 undef, i64 [[newval]])
+// CHECK-64: ret void
+
+// CHECK-64: declare i64 
@llvm.dx.resource.atomicbinop64.tdx.RawBuffer_i8_1_0t(target("dx.RawBuffer", 
i8, 1, 0), i32, i32, i32, i32, i64)
+
+#endif
diff --git a/clang/test/SemaHLSL/BuiltIns/interlocked-or-errors.hlsl 
b/clang/test/SemaHLSL/BuiltIns/interlocked-or-errors.hlsl
new file mode 100644
index 0000000000000..2c9da9b840297
--- /dev/null
+++ b/clang/test/SemaHLSL/BuiltIns/interlocked-or-errors.hlsl
@@ -0,0 +1,84 @@
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-library %s -verify
+
+void no_arg() {
+  __builtin_hlsl_interlocked_or();
+  // expected-error@-1 {{too few arguments to function call, expected 3, have 
0}}
+}
+
+void too_many_args() {
+  __builtin_hlsl_interlocked_or(0, 0, 0, 0, 0);
+  // expected-error@-1 {{too many arguments to function call, expected at most 
4, have 5}}
+}
+
+void non_resource_arg() {
+  __builtin_hlsl_interlocked_or(0, 0, 0);
+  // expected-error@-1 {{used type 'int' where __hlsl_resource_t is required}}
+}
+
+void ret_no_arg() {
+  __builtin_hlsl_interlocked_or_ret();
+  // expected-error@-1 {{too few arguments to function call, expected 4, have 
0}}
+}
+
+void ret_too_many_args() {
+  __builtin_hlsl_interlocked_or_ret(0, 0, 0, 0, 0, 0);
+  // expected-error@-1 {{too many arguments to function call, expected at most 
5, have 6}}
+}
+
+void ret_non_resource_arg() {
+  __builtin_hlsl_interlocked_or_ret(0, 0, 0, 0);
+  // expected-error@-1 {{used type 'int' where __hlsl_resource_t is required}}
+}
+
+// ByteAddressBuffer
+using handle_char_t = __hlsl_resource_t [[hlsl::resource_class(SRV)]] 
[[hlsl::raw_buffer]] [[hlsl::contained_type(char)]];
+// Buffer<int>
+using handle_int_t = __hlsl_resource_t [[hlsl::resource_class(SRV)]] 
[[hlsl::contained_type(int)]];
+// RWBuffer<float>
+using handle_float_t = __hlsl_resource_t [[hlsl::resource_class(UAV)]] 
[[hlsl::contained_type(float)]];
+
+struct CustomResource {
+  handle_char_t ByteAddressBufferChar;
+  handle_int_t BufferInt;
+  handle_float_t RWBufferFloat;
+};
+
+void invalid_byte_address_buffer(CustomResource CR) {
+  __builtin_hlsl_interlocked_or(CR.ByteAddressBufferChar, 0, 0);
+  // expected-error@-1 {{invalid __hlsl_resource_t type attributes}}
+}
+
+void invalid_typed_buffer(CustomResource CR) {
+  __builtin_hlsl_interlocked_or(CR.BufferInt, 0, 0);
+  // expected-error@-1 {{invalid __hlsl_resource_t type attributes}}
+}
+
+void invalid_rw_typed_buffer(CustomResource CR) {
+  __builtin_hlsl_interlocked_or(CR.RWBufferFloat, 0, 0);
+  // expected-error@-1 {{invalid __hlsl_resource_t type attributes}}
+}
+
+void ret_invalid_byte_address_buffer(CustomResource CR) {
+  __builtin_hlsl_interlocked_or_ret(CR.ByteAddressBufferChar, 0, 0, 0);
+  // expected-error@-1 {{invalid __hlsl_resource_t type attributes}}
+}
+
+void ret_invalid_typed_buffer(CustomResource CR) {
+  __builtin_hlsl_interlocked_or_ret(CR.BufferInt, 0, 0, 0);
+  // expected-error@-1 {{invalid __hlsl_resource_t type attributes}}
+}
+
+void ret_invalid_rw_typed_buffer(CustomResource CR) {
+  __builtin_hlsl_interlocked_or_ret(CR.RWBufferFloat, 0, 0, 0);
+  // expected-error@-1 {{invalid __hlsl_resource_t type attributes}}
+}
+
+void wrong_shader_model() {
+  __builtin_hlsl_interlocked_or64(0, 0, 0, 0);
+  // expected-error@-1 {{intrinsic '__builtin_hlsl_interlocked_or64(0, 0, 0, 
0)' requires shader model 6.6 or greater}}
+}
+
+void ret_wrong_shader_model() {
+  __builtin_hlsl_interlocked_or_ret64(0, 0, 0, 0);
+  // expected-error@-1 {{intrinsic '__builtin_hlsl_interlocked_or_ret64(0, 0, 
0, 0)' requires shader model 6.6 or greater}}
+}
diff --git a/clang/test/SemaHLSL/BuiltIns/interlocked-or64-errors.hlsl 
b/clang/test/SemaHLSL/BuiltIns/interlocked-or64-errors.hlsl
new file mode 100644
index 0000000000000..5a2a0ea2b23af
--- /dev/null
+++ b/clang/test/SemaHLSL/BuiltIns/interlocked-or64-errors.hlsl
@@ -0,0 +1,74 @@
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.6-library %s -verify
+
+void no_arg() {
+  __builtin_hlsl_interlocked_or64();
+  // expected-error@-1 {{too few arguments to function call, expected 3, have 
0}}
+}
+
+void too_many_args() {
+  __builtin_hlsl_interlocked_or64(0, 0, 0, 0, 0);
+  // expected-error@-1 {{too many arguments to function call, expected at most 
4, have 5}}
+}
+
+void non_resource_arg() {
+  __builtin_hlsl_interlocked_or64(0, 0, 0);
+  // expected-error@-1 {{used type 'int' where __hlsl_resource_t is required}}
+}
+
+void ret_no_arg() {
+  __builtin_hlsl_interlocked_or_ret64();
+  // expected-error@-1 {{too few arguments to function call, expected 4, have 
0}}
+}
+
+void ret_too_many_args() {
+  __builtin_hlsl_interlocked_or_ret64(0, 0, 0, 0, 0, 0);
+  // expected-error@-1 {{too many arguments to function call, expected at most 
5, have 6}}
+}
+
+void ret_non_resource_arg() {
+  __builtin_hlsl_interlocked_or_ret64(0, 0, 0, 0);
+  // expected-error@-1 {{used type 'int' where __hlsl_resource_t is required}}
+}
+
+// ByteAddressBuffer
+using handle_char_t = __hlsl_resource_t [[hlsl::resource_class(SRV)]] 
[[hlsl::raw_buffer]] [[hlsl::contained_type(char)]];
+// Buffer<int>
+using handle_int_t = __hlsl_resource_t [[hlsl::resource_class(SRV)]] 
[[hlsl::contained_type(int)]];
+// RWBuffer<float>
+using handle_float_t = __hlsl_resource_t [[hlsl::resource_class(UAV)]] 
[[hlsl::contained_type(float)]];
+
+struct CustomResource {
+  handle_char_t ByteAddressBufferChar;
+  handle_int_t BufferInt;
+  handle_float_t RWBufferFloat;
+};
+
+void invalid_byte_address_buffer(CustomResource CR) {
+  __builtin_hlsl_interlocked_or64(CR.ByteAddressBufferChar, 0, 0);
+  // expected-error@-1 {{invalid __hlsl_resource_t type attributes}}
+}
+
+void invalid_typed_buffer(CustomResource CR) {
+  __builtin_hlsl_interlocked_or64(CR.BufferInt, 0, 0);
+  // expected-error@-1 {{invalid __hlsl_resource_t type attributes}}
+}
+
+void invalid_rw_typed_buffer(CustomResource CR) {
+  __builtin_hlsl_interlocked_or64(CR.RWBufferFloat, 0, 0);
+  // expected-error@-1 {{invalid __hlsl_resource_t type attributes}}
+}
+
+void ret_invalid_byte_address_buffer(CustomResource CR) {
+  __builtin_hlsl_interlocked_or_ret64(CR.ByteAddressBufferChar, 0, 0, 0);
+  // expected-error@-1 {{invalid __hlsl_resource_t type attributes}}
+}
+
+void ret_invalid_typed_buffer(CustomResource CR) {
+  __builtin_hlsl_interlocked_or_ret64(CR.BufferInt, 0, 0, 0);
+  // expected-error@-1 {{invalid __hlsl_resource_t type attributes}}
+}
+
+void ret_invalid_rw_typed_buffer(CustomResource CR) {
+  __builtin_hlsl_interlocked_or_ret64(CR.RWBufferFloat, 0, 0, 0);
+  // expected-error@-1 {{invalid __hlsl_resource_t type attributes}}
+}
diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td 
b/llvm/include/llvm/IR/IntrinsicsDirectX.td
index 88732bfa5a892..27b9aafed5218 100644
--- a/llvm/include/llvm/IR/IntrinsicsDirectX.td
+++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td
@@ -94,6 +94,9 @@ def int_dx_resource_sample_clamp
 // Cast between target extension handle types and dxil-style opaque handles
 def int_dx_resource_casthandle : Intrinsic<[llvm_any_ty], [llvm_any_ty]>;
 
+def int_dx_resource_atomicbinop: DefaultAttrsIntrinsic<[llvm_i32_ty], 
[llvm_any_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], 
[IntrWriteMem]>;
+def int_dx_resource_atomicbinop64: DefaultAttrsIntrinsic<[llvm_i64_ty], 
[llvm_any_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i64_ty], 
[IntrWriteMem]>;
+
 def int_dx_all : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_any_ty], 
[IntrNoMem]>;
 def int_dx_any : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_any_ty], 
[IntrNoMem]>;
 def int_dx_asdouble : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, 
llvm_double_ty>], [llvm_anyint_ty, LLVMMatchType<0>], [IntrNoMem]>;
diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td
index 8751484496395..a88bade555517 100644
--- a/llvm/lib/Target/DirectX/DXIL.td
+++ b/llvm/lib/Target/DirectX/DXIL.td
@@ -910,6 +910,14 @@ def GetDimensions : DXILOp<72, getDimensions> {
   let stages = [Stages<DXIL1_0, [all_stages]>];
 }
 
+def AtomicBinOp : DXILOp<78, atomicBinOp> {
+  let Doc = "performs an atomic operation on a value in memory";
+  let arguments = [HandleTy, Int32Ty, Int32Ty, Int32Ty, Int32Ty, OverloadTy];
+  let result = OverloadTy;
+  let overloads = [Overloads<DXIL1_0, [Int32Ty, Int64Ty]>];
+  let stages = [Stages<DXIL1_0, [all_stages]>];
+}
+
 def Barrier : DXILOp<80, barrier> {
   let Doc = "inserts a memory barrier in the shader";
   let intrinsics = [
diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp 
b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
index 0c0830cc92aa7..55117340d75da 100644
--- a/llvm/lib/Target/DirectX/DXILOpLowering.cpp
+++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
@@ -920,6 +920,60 @@ class OpLowerer {
     });
   }
 
+  [[nodiscard]] bool lowerAtomicBinOp(Function &F) {
+    IRBuilder<> &IRB = OpBuilder.getIRB();
+    Type *RetTy = IRB.getInt32Ty();
+
+    return replaceFunction(F, [&](CallInst *CI) -> Error {
+      IRB.SetInsertPoint(CI);
+      Value *Handle =
+          createTmpHandleCast(CI->getArgOperand(0), OpBuilder.getHandleType());
+      Value *OperationMode = CI->getArgOperand(1);
+      Value *Index1 = CI->getArgOperand(2);
+      Value *Index2 = CI->getArgOperand(3);
+      Value *Index3 = CI->getArgOperand(4);
+      Value *NewVal = CI->getArgOperand(5);
+      SmallVector<Value *> Args{Handle, OperationMode, Index1,
+                                Index2, Index3,        NewVal};
+
+      Expected<CallInst *> OpCall = OpBuilder.tryCreateOp(
+          dxil::OpCode::AtomicBinOp, Args, CI->getName(), RetTy);
+      if (Error E = OpCall.takeError())
+        return E;
+
+      CI->replaceAllUsesWith(*OpCall);
+      CI->eraseFromParent();
+      return Error::success();
+    });
+  }
+
+  [[nodiscard]] bool lowerAtomicBinOp64(Function &F) {
+    IRBuilder<> &IRB = OpBuilder.getIRB();
+    Type *RetTy = IRB.getInt64Ty();
+
+    return replaceFunction(F, [&](CallInst *CI) -> Error {
+      IRB.SetInsertPoint(CI);
+      Value *Handle =
+          createTmpHandleCast(CI->getArgOperand(0), OpBuilder.getHandleType());
+      Value *OperationMode = CI->getArgOperand(1);
+      Value *Index1 = CI->getArgOperand(2);
+      Value *Index2 = CI->getArgOperand(3);
+      Value *Index3 = CI->getArgOperand(4);
+      Value *NewVal = CI->getArgOperand(5);
+      SmallVector<Value *> Args{Handle, OperationMode, Index1,
+                                Index2, Index3,        NewVal};
+
+      Expected<CallInst *> OpCall = OpBuilder.tryCreateOp(
+          dxil::OpCode::AtomicBinOp, Args, CI->getName(), RetTy);
+      if (Error E = OpCall.takeError())
+        return E;
+
+      CI->replaceAllUsesWith(*OpCall);
+      CI->eraseFromParent();
+      return Error::success();
+    });
+  }
+
   bool lowerIntrinsics() {
     bool Updated = false;
     bool HasErrors = false;
@@ -1006,6 +1060,12 @@ class OpLowerer {
       case Intrinsic::is_fpclass:
         HasErrors |= lowerIsFPClass(F);
         break;
+      case Intrinsic::dx_resource_atomicbinop:
+        HasErrors |= lowerAtomicBinOp(F);
+        break;
+      case Intrinsic::dx_resource_atomicbinop64:
+        HasErrors |= lowerAtomicBinOp64(F);
+        break;
       }
       Updated = true;
     }
diff --git a/llvm/test/CodeGen/DirectX/interlocked-or.ll 
b/llvm/test/CodeGen/DirectX/interlocked-or.ll
new file mode 100644
index 0000000000000..f53a0a296ebda
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/interlocked-or.ll
@@ -0,0 +1,117 @@
+; RUN: opt -S -dxil-op-lower %s | FileCheck %s
+
+target triple = "dxil-pc-shadermodel6.6-compute"
+
+define i32 @_Z20byteaddr_test_return() {
+entry:
+  ; CHECK: [[RETURN:%.*]] = alloca
+  %returnVal = alloca i32, align 4
+  ; CHECK: [[BIND:%.*]] = call %dx.types.Handle 
@dx.op.createHandleFromBinding(i32 217
+  ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 
216, %dx.types.Handle [[BIND]]
+  %buffer = call target("dx.RawBuffer", i8, 1, 0) 
@llvm.dx.resource.handlefrombinding.tdx.RawBuffer_i8_1_0t(i32 0, i32 0, i32 1, 
i32 0, ptr null)
+  ; CHECK: [[INTERLOCKED:%.*]] = call i32 @dx.op.atomicBinOp.i32(i32 78, 
%dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 undef, i32 undef, i32 0)
+  %hlsl.interlocked.or = call i32 
@llvm.dx.resource.atomicbinop.tdx.RawBuffer_i8_1_0t(target("dx.RawBuffer", i8, 
1, 0) %buffer, i32 2, i32 1, i32 undef, i32 undef, i32 0)
+  ; CHECK: store i32 [[INTERLOCKED]], ptr [[RETURN]]
+  store i32 %hlsl.interlocked.or, ptr %returnVal, align 4
+  ; CHECK: [[RETLOAD:]] = load i32, ptr [[RETURN]]
+  %0 = load i32, ptr %returnVal, align 4
+  ; CHECK; ret i32 [[RETLOAD]]
+  ret i32 %0
+}
+
+define void @_Z23byteaddr_test_no_return() {
+entry:
+  ; CHECK: [[BIND:%.*]] = call %dx.types.Handle 
@dx.op.createHandleFromBinding(i32 217
+  ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 
216, %dx.types.Handle [[BIND]]
+  %buffer = call target("dx.RawBuffer", i8, 1, 0) 
@llvm.dx.resource.handlefrombinding.tdx.RawBuffer_i8_1_0t(i32 0, i32 0, i32 1, 
i32 0, ptr null)
+  ; CHECK: [[INTERLOCKED:%.*]] = call i32 @dx.op.atomicBinOp.i32(i32 78, 
%dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 undef, i32 undef, i32 0)
+  %hlsl.interlocked.or = call i32 
@llvm.dx.resource.atomicbinop.tdx.RawBuffer_i8_1_0t(target("dx.RawBuffer", i8, 
1, 0) %buffer, i32 2, i32 1, i32 undef, i32 undef, i32 0)
+  ; CHECK: ret void
+  ret void
+}
+
+%struct.TestStruct = type { i32, i32 }
+
+define i32 @_Z18struct_test_return() {
+entry:
+  ; CHECK: [[RETURN:%.*]] = alloca
+  %returnVal = alloca i32, align 4
+  ; CHECK: [[BIND:%.*]] = call %dx.types.Handle 
@dx.op.createHandleFromBinding(i32 217
+  ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 
216, %dx.types.Handle [[BIND]]
+  %buffer = call target("dx.RawBuffer", %struct.TestStruct, 1, 0) 
@llvm.dx.resource.handlefrombinding.tdx.RawBuffer_s_struct.TestStructs_1_0t(i32 
0, i32 0, i32 1, i32 0, ptr null)
+  ; CHECK: [[INTERLOCKED:%.*]] = call i32 @dx.op.atomicBinOp.i32(i32 78, 
%dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 4, i32 undef, i32 0)
+  %hlsl.interlocked.or = call i32 
@llvm.dx.resource.atomicbinop.tdx.RawBuffer_s_struct.TestStructs_1_0t(target("dx.RawBuffer",
 %struct.TestStruct, 1, 0) %buffer, i32 2, i32 1, i32 4, i32 undef, i32 0)
+  ; CHECK: store i32 [[INTERLOCKED]], ptr [[RETURN]]
+  store i32 %hlsl.interlocked.or, ptr %returnVal, align 4
+  ; CHECK: [[RETLOAD:]] = load i32, ptr [[RETURN]]
+  %0 = load i32, ptr %returnVal, align 4
+  ; CHECK; ret i32 [[RETLOAD]]
+  ret i32 %0
+}
+
+define void @_Z21struct_test_no_return() {
+entry:
+  ; CHECK: [[BIND:%.*]] = call %dx.types.Handle 
@dx.op.createHandleFromBinding(i32 217
+  ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 
216, %dx.types.Handle [[BIND]]
+  %buffer = call target("dx.RawBuffer", %struct.TestStruct, 1, 0) 
@llvm.dx.resource.handlefrombinding.tdx.RawBuffer_s_struct.TestStructs_1_0t(i32 
0, i32 0, i32 1, i32 0, ptr null)
+  ; CHECK: [[INTERLOCKED:%.*]] = call i32 @dx.op.atomicBinOp.i32(i32 78, 
%dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 4, i32 undef, i32 0)
+  %hlsl.interlocked.or = call i32 
@llvm.dx.resource.atomicbinop.tdx.RawBuffer_s_struct.TestStructs_1_0t(target("dx.RawBuffer",
 %struct.TestStruct, 1, 0) %buffer, i32 2, i32 1, i32 4, i32 undef, i32 0)
+  ; CHECK: ret void
+  ret void
+}
+
+define i32 @_Z21typed_int_test_return() {
+entry:
+  ; CHECK: [[RETURN:%.*]] = alloca
+  %returnVal = alloca i32, align 4
+  ; CHECK: [[BIND:%.*]] = call %dx.types.Handle 
@dx.op.createHandleFromBinding(i32 217
+  ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 
216, %dx.types.Handle [[BIND]]
+  %buffer = call target("dx.TypedBuffer", i32, 1, 0, 1) 
@llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_i32_1_0t(i32 0, i32 0, i32 
1, i32 0, ptr null)
+  ; CHECK: [[INTERLOCKED:%.*]] = call i32 @dx.op.atomicBinOp.i32(i32 78, 
%dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 undef, i32 undef, i32 0)
+  %hlsl.interlocked.or = call i32 
@llvm.dx.resource.atomicbinop.tdx.TypedBuffer_i32_1_0_1t(target("dx.TypedBuffer",
 i32, 1, 0, 1) %buffer, i32 2, i32 1, i32 undef, i32 undef, i32 0)
+  ; CHECK: store i32 [[INTERLOCKED]], ptr [[RETURN]]
+  store i32 %hlsl.interlocked.or, ptr %returnVal, align 4
+  ; CHECK: [[RETLOAD:]] = load i32, ptr [[RETURN]]
+  %0 = load i32, ptr %returnVal, align 4
+  ; CHECK; ret i32 [[RETLOAD]]
+  ret i32 %0
+}
+
+define void @_Z24typed_int_test_no_return() {
+entry:
+  ; CHECK: [[BIND:%.*]] = call %dx.types.Handle 
@dx.op.createHandleFromBinding(i32 217
+  ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 
216, %dx.types.Handle [[BIND]]
+  %buffer = call target("dx.TypedBuffer", i32, 1, 0, 1) 
@llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_i32_1_0t(i32 0, i32 0, i32 
1, i32 0, ptr null)
+  ; CHECK: [[INTERLOCKED:%.*]] = call i32 @dx.op.atomicBinOp.i32(i32 78, 
%dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 undef, i32 undef, i32 0)
+  %hlsl.interlocked.or = call i32 
@llvm.dx.resource.atomicbinop.tdx.TypedBuffer_i32_1_0_1t(target("dx.TypedBuffer",
 i32, 1, 0, 1) %buffer, i32 2, i32 1, i32 undef, i32 undef, i32 0)
+  ; CHECK: ret void
+  ret void
+}
+
+define i32 @_Z22typed_uint_test_return() {
+entry:
+  ; CHECK: [[RETURN:%.*]] = alloca
+  %returnVal = alloca i32, align 4
+  ; CHECK: [[BIND:%.*]] = call %dx.types.Handle 
@dx.op.createHandleFromBinding(i32 217
+  ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 
216, %dx.types.Handle [[BIND]]
+  %buffer = call target("dx.TypedBuffer", i32, 1, 0, 0) 
@llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_i32_1_0t(i32 0, i32 0, i32 
1, i32 0, ptr null)
+  ; CHECK: [[INTERLOCKED:%.*]] = call i32 @dx.op.atomicBinOp.i32(i32 78, 
%dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 undef, i32 undef, i32 0)
+  %hlsl.interlocked.or = call i32 
@llvm.dx.resource.atomicbinop.tdx.TypedBuffer_i32_1_0_0t(target("dx.TypedBuffer",
 i32, 1, 0, 0) %buffer, i32 2, i32 1, i32 undef, i32 undef, i32 0)
+  ; CHECK: store i32 [[INTERLOCKED]], ptr [[RETURN]]
+  store i32 %hlsl.interlocked.or, ptr %returnVal, align 4
+  ; CHECK: [[RETLOAD:]] = load i32, ptr [[RETURN]]
+  %0 = load i32, ptr %returnVal, align 4
+  ; CHECK; ret i32 [[RETLOAD]]
+  ret i32 %0
+}
+
+define void @_Z25typed_uint_test_no_return() {
+entry:
+  ; CHECK: [[BIND:%.*]] = call %dx.types.Handle 
@dx.op.createHandleFromBinding(i32 217
+  ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 
216, %dx.types.Handle [[BIND]]
+  %buffer = call target("dx.TypedBuffer", i32, 1, 0, 0) 
@llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_i32_1_0t(i32 0, i32 0, i32 
1, i32 0, ptr null)
+  ; CHECK: [[INTERLOCKED:%.*]] = call i32 @dx.op.atomicBinOp.i32(i32 78, 
%dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 undef, i32 undef, i32 0)
+  %hlsl.interlocked.or = call i32 
@llvm.dx.resource.atomicbinop.tdx.TypedBuffer_i32_1_0_0t(target("dx.TypedBuffer",
 i32, 1, 0, 0) %buffer, i32 2, i32 1, i32 undef, i32 undef, i32 0)
+  ; CHECK: ret void
+  ret void
+}
diff --git a/llvm/test/CodeGen/DirectX/interlocked-or64.ll 
b/llvm/test/CodeGen/DirectX/interlocked-or64.ll
new file mode 100644
index 0000000000000..0aaf170c8eea1
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/interlocked-or64.ll
@@ -0,0 +1,117 @@
+; RUN: opt -S -dxil-op-lower %s | FileCheck %s
+
+target triple = "dxil-pc-shadermodel6.6-compute"
+
+define i64 @_Z20byteaddr_test_return() {
+entry:
+  ; CHECK: [[RETURN:%.*]] = alloca
+  %returnVal = alloca i64, align 8
+  ; CHECK: [[BIND:%.*]] = call %dx.types.Handle 
@dx.op.createHandleFromBinding(i32 217
+  ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 
216, %dx.types.Handle [[BIND]]
+  %buffer = call target("dx.RawBuffer", i8, 1, 0) 
@llvm.dx.resource.handlefrombinding.tdx.RawBuffer_i8_1_0t(i32 0, i32 0, i32 1, 
i32 0, ptr null)
+  ; CHECK: [[INTERLOCKED:%.*]] = call i64 @dx.op.atomicBinOp.i64(i32 78, 
%dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 undef, i32 undef, i64 0)
+  %hlsl.interlocked.or = call i64 
@llvm.dx.resource.atomicbinop64.tdx.RawBuffer_i8_1_0t(target("dx.RawBuffer", 
i8, 1, 0) %buffer, i32 2, i32 1, i32 undef, i32 undef, i64 0)
+  ; CHECK: store i64 [[INTERLOCKED]], ptr [[RETURN]]
+  store i64 %hlsl.interlocked.or, ptr %returnVal, align 8
+  ; CHECK: [[RETLOAD:]] = load i64, ptr [[RETURN]]
+  %0 = load i64, ptr %returnVal, align 8
+  ; CHECK; ret i64 [[RETLOAD]]
+  ret i64 %0
+}
+
+define void @_Z23byteaddr_test_no_return() {
+entry:
+  ; CHECK: [[BIND:%.*]] = call %dx.types.Handle 
@dx.op.createHandleFromBinding(i32 217
+  ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 
216, %dx.types.Handle [[BIND]]
+  %buffer = call target("dx.RawBuffer", i8, 1, 0) 
@llvm.dx.resource.handlefrombinding.tdx.RawBuffer_i8_1_0t(i32 0, i32 0, i32 1, 
i32 0, ptr null)
+  ; CHECK: [[INTERLOCKED:%.*]] = call i64 @dx.op.atomicBinOp.i64(i32 78, 
%dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 undef, i32 undef, i64 0)
+  %hlsl.interlocked.or = call i64 
@llvm.dx.resource.atomicbinop64.tdx.RawBuffer_i8_1_0t(target("dx.RawBuffer", 
i8, 1, 0) %buffer, i32 2, i32 1, i32 undef, i32 undef, i64 0)
+  ; CHECK: ret void
+  ret void
+}
+
+%struct.TestStruct = type { i64, i64 }
+
+define i64 @_Z18struct_test_return() {
+entry:
+  ; CHECK: [[RETURN:%.*]] = alloca
+  %returnVal = alloca i64, align 8
+  ; CHECK: [[BIND:%.*]] = call %dx.types.Handle 
@dx.op.createHandleFromBinding(i32 217
+  ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 
216, %dx.types.Handle [[BIND]]
+  %buffer = call target("dx.RawBuffer", %struct.TestStruct, 1, 0) 
@llvm.dx.resource.handlefrombinding.tdx.RawBuffer_s_struct.TestStructs_1_0t(i32 
0, i32 0, i32 1, i32 0, ptr null)
+  ; CHECK: [[INTERLOCKED:%.*]] = call i64 @dx.op.atomicBinOp.i64(i32 78, 
%dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 8, i32 undef, i64 0)
+  %hlsl.interlocked.or = call i64 
@llvm.dx.resource.atomicbinop64.tdx.RawBuffer_s_struct.TestStructs_1_0t(target("dx.RawBuffer",
 %struct.TestStruct, 1, 0) %buffer, i32 2, i32 1, i32 8, i32 undef, i64 0)
+  ; CHECK: store i64 [[INTERLOCKED]], ptr [[RETURN]]
+  store i64 %hlsl.interlocked.or, ptr %returnVal, align 8
+  ; CHECK: [[RETLOAD:]] = load i64, ptr [[RETURN]]
+  %0 = load i64, ptr %returnVal, align 8
+  ; CHECK; ret i64 [[RETLOAD]]
+  ret i64 %0
+}
+
+define void @_Z21struct_test_no_return() {
+entry:
+  ; CHECK: [[BIND:%.*]] = call %dx.types.Handle 
@dx.op.createHandleFromBinding(i32 217
+  ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 
216, %dx.types.Handle [[BIND]]
+  %buffer = call target("dx.RawBuffer", %struct.TestStruct, 1, 0) 
@llvm.dx.resource.handlefrombinding.tdx.RawBuffer_s_struct.TestStructs_1_0t(i32 
0, i32 0, i32 1, i32 0, ptr null)
+  ; CHECK: [[INTERLOCKED:%.*]] = call i64 @dx.op.atomicBinOp.i64(i32 78, 
%dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 8, i32 undef, i64 0)
+  %hlsl.interlocked.or = call i64 
@llvm.dx.resource.atomicbinop64.tdx.RawBuffer_s_struct.TestStructs_1_0t(target("dx.RawBuffer",
 %struct.TestStruct, 1, 0) %buffer, i32 2, i32 1, i32 8, i32 undef, i64 0)
+  ; CHECK: ret void
+  ret void
+}
+
+define i64 @_Z21typed_int_test_return() {
+entry:
+  ; CHECK: [[RETURN:%.*]] = alloca
+  %returnVal = alloca i64, align 8
+  ; CHECK: [[BIND:%.*]] = call %dx.types.Handle 
@dx.op.createHandleFromBinding(i32 217
+  ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 
216, %dx.types.Handle [[BIND]]
+  %buffer = call target("dx.TypedBuffer", i64, 1, 0, 1) 
@llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_i64_1_0t(i32 0, i32 0, i32 
1, i32 0, ptr null)
+  ; CHECK: [[INTERLOCKED:%.*]] = call i64 @dx.op.atomicBinOp.i64(i32 78, 
%dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 undef, i32 undef, i64 0)
+  %hlsl.interlocked.or = call i64 
@llvm.dx.resource.atomicbinop64.tdx.TypedBuffer_i64_1_0_1t(target("dx.TypedBuffer",
 i64, 1, 0, 1) %buffer, i32 2, i32 1, i32 undef, i32 undef, i64 0)
+  ; CHECK: store i64 [[INTERLOCKED]], ptr [[RETURN]]
+  store i64 %hlsl.interlocked.or, ptr %returnVal, align 8
+  ; CHECK: [[RETLOAD:]] = load i64, ptr [[RETURN]]
+  %0 = load i64, ptr %returnVal, align 8
+  ; CHECK; ret i64 [[RETLOAD]]
+  ret i64 %0
+}
+
+define void @_Z24typed_int_test_no_return() {
+entry:
+  ; CHECK: [[BIND:%.*]] = call %dx.types.Handle 
@dx.op.createHandleFromBinding(i32 217
+  ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 
216, %dx.types.Handle [[BIND]]
+  %buffer = call target("dx.TypedBuffer", i64, 1, 0, 1) 
@llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_i64_1_0t(i32 0, i32 0, i32 
1, i32 0, ptr null)
+  ; CHECK: [[INTERLOCKED:%.*]] = call i64 @dx.op.atomicBinOp.i64(i32 78, 
%dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 undef, i32 undef, i64 0)
+  %hlsl.interlocked.or = call i64 
@llvm.dx.resource.atomicbinop64.tdx.TypedBuffer_i64_1_0_1t(target("dx.TypedBuffer",
 i64, 1, 0, 1) %buffer, i32 2, i32 1, i32 undef, i32 undef, i64 0)
+  ; CHECK: ret void
+  ret void
+}
+
+define i64 @_Z22typed_uint_test_return() {
+entry:
+  ; CHECK: [[RETURN:%.*]] = alloca
+  %returnVal = alloca i64, align 8
+  ; CHECK: [[BIND:%.*]] = call %dx.types.Handle 
@dx.op.createHandleFromBinding(i32 217
+  ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 
216, %dx.types.Handle [[BIND]]
+  %buffer = call target("dx.TypedBuffer", i64, 1, 0, 0) 
@llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_i64_1_0t(i32 0, i32 0, i32 
1, i32 0, ptr null)
+  ; CHECK: [[INTERLOCKED:%.*]] = call i64 @dx.op.atomicBinOp.i64(i32 78, 
%dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 undef, i32 undef, i64 0)
+  %hlsl.interlocked.or = call i64 
@llvm.dx.resource.atomicbinop64.tdx.TypedBuffer_i64_1_0_0t(target("dx.TypedBuffer",
 i64, 1, 0, 0) %buffer, i32 2, i32 1, i32 undef, i32 undef, i64 0)
+  ; CHECK: store i64 [[INTERLOCKED]], ptr [[RETURN]]
+  store i64 %hlsl.interlocked.or, ptr %returnVal, align 8
+  ; CHECK: [[RETLOAD:]] = load i64, ptr [[RETURN]]
+  %0 = load i64, ptr %returnVal, align 8
+  ; CHECK; ret i64 [[RETLOAD]]
+  ret i64 %0
+}
+
+define void @_Z25typed_uint_test_no_return() {
+entry:
+  ; CHECK: [[BIND:%.*]] = call %dx.types.Handle 
@dx.op.createHandleFromBinding(i32 217
+  ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 
216, %dx.types.Handle [[BIND]]
+  %buffer = call target("dx.TypedBuffer", i64, 1, 0, 0) 
@llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_i64_1_0t(i32 0, i32 0, i32 
1, i32 0, ptr null)
+  ; CHECK: [[INTERLOCKED:%.*]] = call i64 @dx.op.atomicBinOp.i64(i32 78, 
%dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 undef, i32 undef, i64 0)
+  %hlsl.interlocked.or = call i64 
@llvm.dx.resource.atomicbinop64.tdx.TypedBuffer_i64_1_0_0t(target("dx.TypedBuffer",
 i64, 1, 0, 0) %buffer, i32 2, i32 1, i32 undef, i32 undef, i64 0)
+  ; CHECK: ret void
+  ret void
+}

>From b13e91b3e6b71f64c5651a34397f762e7622d019 Mon Sep 17 00:00:00 2001
From: Alexander Johnston <[email protected]>
Date: Wed, 25 Feb 2026 13:05:41 +0000
Subject: [PATCH 2/2] Update from review feedback

Moves some of the interlocked/atomicBinOp logic to the backend and into DXIL.td
With the removal of the 64bit variant of interlocked_or for lowering purposes 
the
backend test is condensed into one.
Replaces frontend Undef values with Poison, with the intention of being lowered
into Undef in the DXIL backend. The Poison -> Undef replacement is NOT done in
the backend yet.
---
 clang/lib/CodeGen/CGHLSLBuiltins.cpp          |  54 +++----
 .../builtins/Interlocked-or-builtin.hlsl      |  16 +-
 .../CodeGenHLSL/builtins/Interlocked-or.hlsl  |  12 +-
 llvm/include/llvm/IR/IntrinsicsDirectX.td     |   3 +-
 llvm/lib/Target/DirectX/DXIL.td               |  25 ++++
 llvm/lib/Target/DirectX/DXILOpLowering.cpp    |  64 +-------
 llvm/test/CodeGen/DirectX/interlocked-or.ll   | 140 +++++++++++++++++-
 llvm/test/CodeGen/DirectX/interlocked-or64.ll | 117 ---------------
 8 files changed, 193 insertions(+), 238 deletions(-)
 delete mode 100644 llvm/test/CodeGen/DirectX/interlocked-or64.ll

diff --git a/clang/lib/CodeGen/CGHLSLBuiltins.cpp 
b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
index 39d716bea91bf..ba96b087e59f6 100644
--- a/clang/lib/CodeGen/CGHLSLBuiltins.cpp
+++ b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
@@ -300,23 +300,8 @@ static Value *handleElementwiseF32ToF16(CodeGenFunction 
&CGF,
   llvm_unreachable("Intrinsic F32ToF16 not supported by target architecture");
 }
 
-// Not sure where would be best for this to live
-// AtomicBinOp uses an i32 to determine the operation mode as follows
-enum AtomicOperationCode : uint {
-  Add = 0,
-  And = 1,
-  Or = 2,
-  Xor = 3,
-  IMin = 4,
-  IMax = 5,
-  UMin = 6,
-  UMax = 7,
-  Exchange = 8
-};
-
-static Value *handleAtomicBinOp(CodeGenFunction &CGF, const CallExpr *E,
-                                const AtomicOperationCode OpCode,
-                                const bool HasReturn, const bool Is32Bit) {
+static Value *handleInterlockedOr(CodeGenFunction &CGF, const CallExpr *E,
+                                  const bool HasReturn, const bool Is32Bit) {
   Value *HandleOp = CGF.EmitScalarExpr(E->getArg(0));
   Value *IndexOp = CGF.EmitScalarExpr(E->getArg(1));
   Value *StructuredBufIndexOp;
@@ -349,18 +334,13 @@ static Value *handleAtomicBinOp(CodeGenFunction &CGF, 
const CallExpr *E,
     const HLSLAttributedResourceType *ResourceTy =
         HandleTy->getAs<HLSLAttributedResourceType>();
 
-    // AtomicBinOp uses an i32 to determine the operation mode as follows
-    // Add: 0, And: 1, Or: 2, Xor: 3, IMin: 4, IMax: 5, UMin: 6, UMax: 7,
-    // Exchange: 8
-    Value *ModeConstant = ConstantInt::get(CGF.Int32Ty, OpCode);
-
     // AtomicBinOp has 3 coordinate params which must be handled differently
     // depending on the resource type being accessed.
     // Initially undef all the coordinates then fill as required
-    Value *Undef = UndefValue::get(CGF.Int32Ty);
-    Value *C0 = Undef;
-    Value *C1 = Undef;
-    Value *C2 = Undef;
+    Value *Poison = PoisonValue::get(CGF.Int32Ty);
+    Value *C0 = Poison;
+    Value *C1 = Poison;
+    Value *C2 = Poison;
     if (!ResourceTy->getAttrs().RawBuffer) {
       assert(
           (ResourceTy->getContainedType() == CGF.getContext().IntTy ||
@@ -381,23 +361,23 @@ static Value *handleAtomicBinOp(CodeGenFunction &CGF, 
const CallExpr *E,
       C0 = IndexOp;
       C1 = StructuredBufIndexOp;
     }
-    assert(C0 != Undef && "Failed to identify coordinates for Interlocked");
+    assert(C0 != Poison && "Failed to identify coordinates for Interlocked");
     // TODO: Add coordinate logic for texture and groupshared
 
     // atomicBinOp
     // opcode, handle, binary operation code, coordinates c0, c1, c2, new val
     if (Is32Bit) {
-      Intrinsic::ID ID = Intrinsic::dx_resource_atomicbinop;
+      Intrinsic::ID ID = Intrinsic::dx_interlocked_or;
       OldValueOp = CGF.Builder.CreateIntrinsic(
           /*ReturnType=*/CGF.Int32Ty, ID,
-          ArrayRef<Value *>{HandleOp, ModeConstant, C0, C1, C2, NewValueOp},
-          nullptr, "hlsl.interlocked.or");
+          ArrayRef<Value *>{HandleOp, C0, C1, C2, NewValueOp}, nullptr,
+          "hlsl.interlocked.or");
     } else {
-      Intrinsic::ID ID = Intrinsic::dx_resource_atomicbinop64;
+      Intrinsic::ID ID = Intrinsic::dx_interlocked_or;
       OldValueOp = CGF.Builder.CreateIntrinsic(
           /*ReturnType=*/CGF.Int64Ty, ID,
-          ArrayRef<Value *>{HandleOp, ModeConstant, C0, C1, C2, NewValueOp},
-          nullptr, "hlsl.interlocked.or");
+          ArrayRef<Value *>{HandleOp, C0, C1, C2, NewValueOp}, nullptr,
+          "hlsl.interlocked.or");
     }
     break;
   }
@@ -1298,16 +1278,16 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned 
BuiltinID,
     return Builder.CreateCall(SpecConstantFn, Args);
   }
   case Builtin::BI__builtin_hlsl_interlocked_or: {
-    return handleAtomicBinOp(*this, E, AtomicOperationCode::Or, false, true);
+    return handleInterlockedOr(*this, E, false, true);
   }
   case Builtin::BI__builtin_hlsl_interlocked_or64: {
-    return handleAtomicBinOp(*this, E, AtomicOperationCode::Or, false, false);
+    return handleInterlockedOr(*this, E, false, false);
   }
   case Builtin::BI__builtin_hlsl_interlocked_or_ret: {
-    return handleAtomicBinOp(*this, E, AtomicOperationCode::Or, true, true);
+    return handleInterlockedOr(*this, E, true, true);
   }
   case Builtin::BI__builtin_hlsl_interlocked_or_ret64: {
-    return handleAtomicBinOp(*this, E, AtomicOperationCode::Or, true, false);
+    return handleInterlockedOr(*this, E, true, false);
   }
   }
   return nullptr;
diff --git a/clang/test/CodeGenHLSL/builtins/Interlocked-or-builtin.hlsl 
b/clang/test/CodeGenHLSL/builtins/Interlocked-or-builtin.hlsl
index fa2a75f2309e9..b43666c89a45f 100644
--- a/clang/test/CodeGenHLSL/builtins/Interlocked-or-builtin.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/Interlocked-or-builtin.hlsl
@@ -32,9 +32,9 @@ struct CustomResource {
 #ifndef STRUCTURED
 
 // CHECK-LABEL: define {{.*}} i32 @_Z11test_return14CustomResource(
-// CHECK-BYTEADDRESS: %hlsl.interlocked.or = call i32 
@llvm.dx.resource.atomicbinop.tdx.RawBuffer_i8_1_0t(target("dx.RawBuffer", i8, 
1, 0) {{%.*}}, i32 2, i32 1, i32 undef, i32 undef, i32 0)
-// CHECK-INTBUF: %hlsl.interlocked.or = call i32 
@llvm.dx.resource.atomicbinop.tdx.TypedBuffer_i32_1_0_1t(target("dx.TypedBuffer",
 i32, 1, 0, 1) {{%.*}}, i32 2, i32 1, i32 undef, i32 undef, i32 0)
-// CHECK-UINTBUF: %hlsl.interlocked.or = call i32 
@llvm.dx.resource.atomicbinop.tdx.TypedBuffer_i32_1_0_0t(target("dx.TypedBuffer",
 i32, 1, 0, 0) {{%.*}}, i32 2, i32 1, i32 undef, i32 undef, i32 0)
+// CHECK-BYTEADDRESS: %hlsl.interlocked.or = call i32 
@llvm.dx.interlocked.or.i32.tdx.RawBuffer_i8_1_0t.i32(target("dx.RawBuffer", 
i8, 1, 0) {{%.*}}, i32 1, i32 poison, i32 poison, i32 0)
+// CHECK-INTBUF: %hlsl.interlocked.or = call i32 
@llvm.dx.interlocked.or.i32.tdx.TypedBuffer_i32_1_0_1t.i32(target("dx.TypedBuffer",
 i32, 1, 0, 1) {{%.*}}, i32 1, i32 poison, i32 poison, i32 0)
+// CHECK-UINTBUF: %hlsl.interlocked.or = call i32 
@llvm.dx.interlocked.or.i32.tdx.TypedBuffer_i32_1_0_0t.i32(target("dx.TypedBuffer",
 i32, 1, 0, 0) {{%.*}}, i32 1, i32 poison, i32 poison, i32 0)
 // CHECK-NEXT: store i32 %hlsl.interlocked.or, ptr [[returnVal:%.*]], align 4
 // CHECK-NEXT: [[loadedReturnVal:%.*]] = load i32, ptr [[returnVal]], align 4
 // CHECK-NEXT: ret i32 [[loadedReturnVal]]
@@ -45,9 +45,9 @@ unsigned int test_return(CustomResource cr) {
 }
 
 // CHECK-LABEL: define {{.*}} void @_Z14test_no_return14CustomResource(
-// CHECK-BYTEADDRESS: %hlsl.interlocked.or = call i32 
@llvm.dx.resource.atomicbinop.tdx.RawBuffer_i8_1_0t(target("dx.RawBuffer", i8, 
1, 0) {{%.*}}, i32 2, i32 1, i32 undef, i32 undef, i32 0)
-// CHECK-INTBUF: %hlsl.interlocked.or = call i32 
@llvm.dx.resource.atomicbinop.tdx.TypedBuffer_i32_1_0_1t(target("dx.TypedBuffer",
 i32, 1, 0, 1) {{%.*}}, i32 2, i32 1, i32 undef, i32 undef, i32 0)
-// CHECK-UINTBUF: %hlsl.interlocked.or = call i32 
@llvm.dx.resource.atomicbinop.tdx.TypedBuffer_i32_1_0_0t(target("dx.TypedBuffer",
 i32, 1, 0, 0) {{%.*}}, i32 2, i32 1, i32 undef, i32 undef, i32 0)
+// CHECK-BYTEADDRESS: %hlsl.interlocked.or = call i32 
@llvm.dx.interlocked.or.i32.tdx.RawBuffer_i8_1_0t.i32(target("dx.RawBuffer", 
i8, 1, 0) {{%.*}}, i32 1, i32 poison, i32 poison, i32 0)
+// CHECK-INTBUF: %hlsl.interlocked.or = call i32 
@llvm.dx.interlocked.or.i32.tdx.TypedBuffer_i32_1_0_1t.i32(target("dx.TypedBuffer",
 i32, 1, 0, 1) {{%.*}}, i32 1, i32 poison, i32 poison, i32 0)
+// CHECK-UINTBUF: %hlsl.interlocked.or = call i32 
@llvm.dx.interlocked.or.i32.tdx.TypedBuffer_i32_1_0_0t.i32(target("dx.TypedBuffer",
 i32, 1, 0, 0) {{%.*}}, i32 1, i32 poison, i32 poison, i32 0)
 // CHECK-NEXT: ret void
 void test_no_return(CustomResource h) {
   __builtin_hlsl_interlocked_or(h.h, 1u, 0u);
@@ -56,7 +56,7 @@ void test_no_return(CustomResource h) {
 #else
 
 // CHECK-STRUCTURED-LABEL: define {{.*}} i32 @_Z11test_return14CustomResource(
-// CHECK-STRUCTURED: %hlsl.interlocked.or = call i32 
@llvm.dx.resource.atomicbinop.tdx.RawBuffer_s_struct.TestStructs_1_0t(target("dx.RawBuffer",
 %struct.TestStruct, 1, 0) %0, i32 2, i32 1, i32 4, i32 undef, i32 0)
+// CHECK-STRUCTURED: %hlsl.interlocked.or = call i32 
@llvm.dx.interlocked.or.i32.tdx.RawBuffer_s_struct.TestStructs_1_0t.i32(target("dx.RawBuffer",
 %struct.TestStruct, 1, 0) %0, i32 1, i32 4, i32 poison, i32 0)
 // CHECK-STRUCTURED-NEXT: store i32 %hlsl.interlocked.or, ptr 
[[returnVal:%.*]], align 4
 // CHECK-STRUCTURED-NEXT: [[loadedReturnVal:%.*]] = load i32, ptr 
[[returnVal]], align 4
 // CHECK-STRUCTURED-NEXT: ret i32 [[loadedReturnVal]]
@@ -67,7 +67,7 @@ unsigned int test_return(CustomResource cr) {
 }
 
 // CHECK-STRUCTURED-LABEL: define {{.*}} void 
@_Z14test_no_return14CustomResource(
-// CHECK-STRUCTURED: %hlsl.interlocked.or = call i32 
@llvm.dx.resource.atomicbinop.tdx.RawBuffer_s_struct.TestStructs_1_0t(target("dx.RawBuffer",
 %struct.TestStruct, 1, 0) %0, i32 2, i32 1, i32 4, i32 undef, i32 0)
+// CHECK-STRUCTURED: %hlsl.interlocked.or = call i32 
@llvm.dx.interlocked.or.i32.tdx.RawBuffer_s_struct.TestStructs_1_0t.i32(target("dx.RawBuffer",
 %struct.TestStruct, 1, 0) %0, i32 1, i32 4, i32 poison, i32 0)
 // CHECK-STRUCTURED-NEXT: ret void
 void test_no_return(CustomResource h) {
   __builtin_hlsl_interlocked_or(h.h, 1u, 4u, 0u);
diff --git a/clang/test/CodeGenHLSL/builtins/Interlocked-or.hlsl 
b/clang/test/CodeGenHLSL/builtins/Interlocked-or.hlsl
index 85caf9aeb4120..b6611ff0800ae 100644
--- a/clang/test/CodeGenHLSL/builtins/Interlocked-or.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/Interlocked-or.hlsl
@@ -28,7 +28,7 @@ uint test_return() {
 // CHECK-32: [[buf:%.*]] = load target("dx.RawBuffer", i8, 1, 0), ptr 
[[handle]]
 // CHECK-32: [[dest:%.*]] = load i32, ptr %dest.addr
 // CHECK-32: [[newval:%.*]] = load i32, ptr %value.addr
-// CHECK-32: [[result:%.*]] = call i32 
@llvm.dx.resource.atomicbinop.tdx.RawBuffer_i8_1_0t(target("dx.RawBuffer", i8, 
1, 0) [[buf]], i32 2, i32 [[dest]], i32 undef, i32 undef, i32 [[newval]])
+// CHECK-32: [[result:%.*]] = call i32 
@llvm.dx.interlocked.or.i32.tdx.RawBuffer_i8_1_0t.i32(target("dx.RawBuffer", 
i8, 1, 0) [[buf]], i32 [[dest]], i32 poison, i32 poison, i32 [[newval]])
 // CHECK-32: [[loaded_orig_val_ptr:%.*]] = load ptr, ptr [[original_val]]
 // CHECK-32: store i32 [[result]], ptr [[loaded_orig_val_ptr]]
 
@@ -46,10 +46,10 @@ void test_no_return() {
 // CHECK-32: [[buf:%.*]] = load target("dx.RawBuffer", i8, 1, 0), ptr 
[[handle]]
 // CHECK-32: [[dest:%.*]] = load i32, ptr %dest.addr
 // CHECK-32: [[newval:%.*]] = load i32, ptr %value.addr
-// CHECK-32: {{%.*}} = call i32 
@llvm.dx.resource.atomicbinop.tdx.RawBuffer_i8_1_0t(target("dx.RawBuffer", i8, 
1, 0) [[buf]], i32 2, i32 [[dest]], i32 undef, i32 undef, i32 [[newval]])
+// CHECK-32: {{%.*}} = call i32 
@llvm.dx.interlocked.or.i32.tdx.RawBuffer_i8_1_0t.i32(target("dx.RawBuffer", 
i8, 1, 0) [[buf]], i32 [[dest]], i32 poison, i32 poison, i32 [[newval]])
 // CHECK-32: ret void
 
-// CHECK-32: declare i32 
@llvm.dx.resource.atomicbinop.tdx.RawBuffer_i8_1_0t(target("dx.RawBuffer", i8, 
1, 0), i32, i32, i32, i32, i32)
+// CHECK-32: declare i32 
@llvm.dx.interlocked.or.i32.tdx.RawBuffer_i8_1_0t.i32(target("dx.RawBuffer", 
i8, 1, 0), i32, i32, i32, i32)
 
 #endif
 
@@ -72,7 +72,7 @@ uint64_t test_return64() {
 // CHECK-64: [[buf:%.*]] = load target("dx.RawBuffer", i8, 1, 0), ptr 
[[handle]]
 // CHECK-64: [[dest:%.*]] = load i32, ptr %dest.addr
 // CHECK-64: [[newval:%.*]] = load i64, ptr %value.addr
-// CHECK-64: [[result:%.*]] = call i64 
@llvm.dx.resource.atomicbinop64.tdx.RawBuffer_i8_1_0t(target("dx.RawBuffer", 
i8, 1, 0) [[buf]], i32 2, i32 [[dest]], i32 undef, i32 undef, i64 [[newval]])
+// CHECK-64: [[result:%.*]] = call i64 
@llvm.dx.interlocked.or.i64.tdx.RawBuffer_i8_1_0t.i64(target("dx.RawBuffer", 
i8, 1, 0) [[buf]], i32 [[dest]], i32 poison, i32 poison, i64 [[newval]])
 // CHECK-64: [[loaded_orig_val_ptr:%.*]] = load ptr, ptr [[original_val]]
 // CHECK-64: store i64 [[result]], ptr [[loaded_orig_val_ptr]]
 
@@ -90,9 +90,9 @@ void test_no_return64() {
 // CHECK-64: [[buf:%.*]] = load target("dx.RawBuffer", i8, 1, 0), ptr 
[[handle]]
 // CHECK-64: [[dest:%.*]] = load i32, ptr %dest.addr
 // CHECK-64: [[newval:%.*]] = load i64, ptr %value.addr
-// CHECK-64: {{.*}} = call i64 
@llvm.dx.resource.atomicbinop64.tdx.RawBuffer_i8_1_0t(target("dx.RawBuffer", 
i8, 1, 0) [[buf]], i32 2, i32 [[dest]], i32 undef, i32 undef, i64 [[newval]])
+// CHECK-64: {{.*}} = call i64 
@llvm.dx.interlocked.or.i64.tdx.RawBuffer_i8_1_0t.i64(target("dx.RawBuffer", 
i8, 1, 0) [[buf]], i32 [[dest]], i32 poison, i32 poison, i64 [[newval]])
 // CHECK-64: ret void
 
-// CHECK-64: declare i64 
@llvm.dx.resource.atomicbinop64.tdx.RawBuffer_i8_1_0t(target("dx.RawBuffer", 
i8, 1, 0), i32, i32, i32, i32, i64)
+// CHECK-64: declare i64 
@llvm.dx.interlocked.or.i64.tdx.RawBuffer_i8_1_0t.i64(target("dx.RawBuffer", 
i8, 1, 0), i32, i32, i32, i64)
 
 #endif
diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td 
b/llvm/include/llvm/IR/IntrinsicsDirectX.td
index 27b9aafed5218..2e4dbf2886c2e 100644
--- a/llvm/include/llvm/IR/IntrinsicsDirectX.td
+++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td
@@ -94,8 +94,7 @@ def int_dx_resource_sample_clamp
 // Cast between target extension handle types and dxil-style opaque handles
 def int_dx_resource_casthandle : Intrinsic<[llvm_any_ty], [llvm_any_ty]>;
 
-def int_dx_resource_atomicbinop: DefaultAttrsIntrinsic<[llvm_i32_ty], 
[llvm_any_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], 
[IntrWriteMem]>;
-def int_dx_resource_atomicbinop64: DefaultAttrsIntrinsic<[llvm_i64_ty], 
[llvm_any_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i64_ty], 
[IntrWriteMem]>;
+def int_dx_interlocked_or: DefaultAttrsIntrinsic<[llvm_anyint_ty], 
[llvm_any_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_anyint_ty], 
[IntrWriteMem]>;
 
 def int_dx_all : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_any_ty], 
[IntrNoMem]>;
 def int_dx_any : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_any_ty], 
[IntrNoMem]>;
diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td
index a88bade555517..e3d8037e33853 100644
--- a/llvm/lib/Target/DirectX/DXIL.td
+++ b/llvm/lib/Target/DirectX/DXIL.td
@@ -317,12 +317,23 @@ defvar WaveOpKind_Product = 1;
 defvar WaveOpKind_Min = 2;
 defvar WaveOpKind_Max = 3;
 
+defvar AtomicBinOp_Add = 0;
+defvar AtomicBinOp_And = 1;
+defvar AtomicBinOp_Or = 2;
+defvar AtomicBinOp_Xor = 3;
+defvar AtomicBinOp_IMin = 4;
+defvar AtomicBinOp_IMax = 5;
+defvar AtomicBinOp_UMin = 6;
+defvar AtomicBinOp_UMax = 7;
+defvar AtomicBinOp_Exchange = 8;
+
 defvar SignedOpKind_Signed = 0;
 defvar SignedOpKind_Unsigned = 1;
 
 // Intrinsic arg selection
 class IntrinArgSelectType;
 def IntrinArgSelect_Index : IntrinArgSelectType;
+def IntrinArgSelect_HandleIndex : IntrinArgSelectType;
 def IntrinArgSelect_I8 : IntrinArgSelectType;
 def IntrinArgSelect_I32 : IntrinArgSelectType;
 
@@ -332,6 +343,7 @@ class IntrinArgSelect<IntrinArgSelectType type_, int 
value_> {
 }
 
 class IntrinArgIndex<int index> : IntrinArgSelect<IntrinArgSelect_Index, 
index>;
+class IntrinArgHandleIndex<int index> : 
IntrinArgSelect<IntrinArgSelect_HandleIndex, index>;
 class IntrinArgI8<int value> : IntrinArgSelect<IntrinArgSelect_I8, value>;
 class IntrinArgI32<int value> : IntrinArgSelect<IntrinArgSelect_I32, value>;
 
@@ -351,6 +363,14 @@ class IntrinArgI32<int value> : 
IntrinArgSelect<IntrinArgSelect_I32, value>;
 //     >,
 //   ]
 
//=========================================================================================
+// Using IntrinArgHandleIndex<>, handle arguments of the intrinsic can be 
copied to the
+// DXIL OP with required casting in specific order:
+//   let intrinsics = [
+//     IntrinSelect<int_dx_my_intrinsic,
+//       [IntrinArgHandleHandleIndex<2>, IntrinArgHandleIndex<1>, 
IntrinArgHandleIndex<0>> ]
+//     >,
+//   ]
+//=========================================================================================
 // Using IntrinArgI8<> and IntrinArgI32<>, integer constants can be added
 // directly to the dxil op. This can be used in conjunction with
 // IntrinArgIndex:
@@ -912,6 +932,11 @@ def GetDimensions : DXILOp<72, getDimensions> {
 
 def AtomicBinOp : DXILOp<78, atomicBinOp> {
   let Doc = "performs an atomic operation on a value in memory";
+  let intrinsics = [
+    IntrinSelect<int_dx_interlocked_or,
+    [ IntrinArgHandleIndex<0>, IntrinArgI32<AtomicBinOp_Or>, IntrinArgIndex<1>,
+      IntrinArgIndex<2>, IntrinArgIndex<3>, IntrinArgIndex<4> ]>
+  ];
   let arguments = [HandleTy, Int32Ty, Int32Ty, Int32Ty, Int32Ty, OverloadTy];
   let result = OverloadTy;
   let overloads = [Overloads<DXIL1_0, [Int32Ty, Int64Ty]>];
diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp 
b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
index 55117340d75da..d6f4fbebccd51 100644
--- a/llvm/lib/Target/DirectX/DXILOpLowering.cpp
+++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
@@ -119,6 +119,10 @@ class OpLowerer {
           case IntrinArgSelect::Type::Index:
             Args.push_back(CI->getArgOperand(A.Value));
             break;
+          case IntrinArgSelect::Type::HandleIndex:
+            Args.push_back(createTmpHandleCast(CI->getArgOperand(A.Value),
+                                               OpBuilder.getHandleType()));
+            break;
           case IntrinArgSelect::Type::I8:
             Args.push_back(OpBuilder.getIRB().getInt8((uint8_t)A.Value));
             break;
@@ -920,60 +924,6 @@ class OpLowerer {
     });
   }
 
-  [[nodiscard]] bool lowerAtomicBinOp(Function &F) {
-    IRBuilder<> &IRB = OpBuilder.getIRB();
-    Type *RetTy = IRB.getInt32Ty();
-
-    return replaceFunction(F, [&](CallInst *CI) -> Error {
-      IRB.SetInsertPoint(CI);
-      Value *Handle =
-          createTmpHandleCast(CI->getArgOperand(0), OpBuilder.getHandleType());
-      Value *OperationMode = CI->getArgOperand(1);
-      Value *Index1 = CI->getArgOperand(2);
-      Value *Index2 = CI->getArgOperand(3);
-      Value *Index3 = CI->getArgOperand(4);
-      Value *NewVal = CI->getArgOperand(5);
-      SmallVector<Value *> Args{Handle, OperationMode, Index1,
-                                Index2, Index3,        NewVal};
-
-      Expected<CallInst *> OpCall = OpBuilder.tryCreateOp(
-          dxil::OpCode::AtomicBinOp, Args, CI->getName(), RetTy);
-      if (Error E = OpCall.takeError())
-        return E;
-
-      CI->replaceAllUsesWith(*OpCall);
-      CI->eraseFromParent();
-      return Error::success();
-    });
-  }
-
-  [[nodiscard]] bool lowerAtomicBinOp64(Function &F) {
-    IRBuilder<> &IRB = OpBuilder.getIRB();
-    Type *RetTy = IRB.getInt64Ty();
-
-    return replaceFunction(F, [&](CallInst *CI) -> Error {
-      IRB.SetInsertPoint(CI);
-      Value *Handle =
-          createTmpHandleCast(CI->getArgOperand(0), OpBuilder.getHandleType());
-      Value *OperationMode = CI->getArgOperand(1);
-      Value *Index1 = CI->getArgOperand(2);
-      Value *Index2 = CI->getArgOperand(3);
-      Value *Index3 = CI->getArgOperand(4);
-      Value *NewVal = CI->getArgOperand(5);
-      SmallVector<Value *> Args{Handle, OperationMode, Index1,
-                                Index2, Index3,        NewVal};
-
-      Expected<CallInst *> OpCall = OpBuilder.tryCreateOp(
-          dxil::OpCode::AtomicBinOp, Args, CI->getName(), RetTy);
-      if (Error E = OpCall.takeError())
-        return E;
-
-      CI->replaceAllUsesWith(*OpCall);
-      CI->eraseFromParent();
-      return Error::success();
-    });
-  }
-
   bool lowerIntrinsics() {
     bool Updated = false;
     bool HasErrors = false;
@@ -1060,12 +1010,6 @@ class OpLowerer {
       case Intrinsic::is_fpclass:
         HasErrors |= lowerIsFPClass(F);
         break;
-      case Intrinsic::dx_resource_atomicbinop:
-        HasErrors |= lowerAtomicBinOp(F);
-        break;
-      case Intrinsic::dx_resource_atomicbinop64:
-        HasErrors |= lowerAtomicBinOp64(F);
-        break;
       }
       Updated = true;
     }
diff --git a/llvm/test/CodeGen/DirectX/interlocked-or.ll 
b/llvm/test/CodeGen/DirectX/interlocked-or.ll
index f53a0a296ebda..ef03d7c67b758 100644
--- a/llvm/test/CodeGen/DirectX/interlocked-or.ll
+++ b/llvm/test/CodeGen/DirectX/interlocked-or.ll
@@ -10,7 +10,7 @@ entry:
   ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 
216, %dx.types.Handle [[BIND]]
   %buffer = call target("dx.RawBuffer", i8, 1, 0) 
@llvm.dx.resource.handlefrombinding.tdx.RawBuffer_i8_1_0t(i32 0, i32 0, i32 1, 
i32 0, ptr null)
   ; CHECK: [[INTERLOCKED:%.*]] = call i32 @dx.op.atomicBinOp.i32(i32 78, 
%dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 undef, i32 undef, i32 0)
-  %hlsl.interlocked.or = call i32 
@llvm.dx.resource.atomicbinop.tdx.RawBuffer_i8_1_0t(target("dx.RawBuffer", i8, 
1, 0) %buffer, i32 2, i32 1, i32 undef, i32 undef, i32 0)
+  %hlsl.interlocked.or = tail call i32 
@llvm.dx.interlocked.or.i32.tdx.RawBuffer_i8_1_0t.i32(target("dx.RawBuffer", 
i8, 1, 0) %buffer, i32 0, i32 poison, i32 poison, i32 0)
   ; CHECK: store i32 [[INTERLOCKED]], ptr [[RETURN]]
   store i32 %hlsl.interlocked.or, ptr %returnVal, align 4
   ; CHECK: [[RETLOAD:]] = load i32, ptr [[RETURN]]
@@ -25,7 +25,7 @@ entry:
   ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 
216, %dx.types.Handle [[BIND]]
   %buffer = call target("dx.RawBuffer", i8, 1, 0) 
@llvm.dx.resource.handlefrombinding.tdx.RawBuffer_i8_1_0t(i32 0, i32 0, i32 1, 
i32 0, ptr null)
   ; CHECK: [[INTERLOCKED:%.*]] = call i32 @dx.op.atomicBinOp.i32(i32 78, 
%dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 undef, i32 undef, i32 0)
-  %hlsl.interlocked.or = call i32 
@llvm.dx.resource.atomicbinop.tdx.RawBuffer_i8_1_0t(target("dx.RawBuffer", i8, 
1, 0) %buffer, i32 2, i32 1, i32 undef, i32 undef, i32 0)
+  %hlsl.interlocked.or = call i32 
@llvm.dx.interlocked.or.i32.tdx.RawBuffer_i8_1_0t.i32(target("dx.RawBuffer", 
i8, 1, 0) %buffer, i32 1, i32 poison, i32 poison, i32 0)
   ; CHECK: ret void
   ret void
 }
@@ -40,7 +40,7 @@ entry:
   ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 
216, %dx.types.Handle [[BIND]]
   %buffer = call target("dx.RawBuffer", %struct.TestStruct, 1, 0) 
@llvm.dx.resource.handlefrombinding.tdx.RawBuffer_s_struct.TestStructs_1_0t(i32 
0, i32 0, i32 1, i32 0, ptr null)
   ; CHECK: [[INTERLOCKED:%.*]] = call i32 @dx.op.atomicBinOp.i32(i32 78, 
%dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 4, i32 undef, i32 0)
-  %hlsl.interlocked.or = call i32 
@llvm.dx.resource.atomicbinop.tdx.RawBuffer_s_struct.TestStructs_1_0t(target("dx.RawBuffer",
 %struct.TestStruct, 1, 0) %buffer, i32 2, i32 1, i32 4, i32 undef, i32 0)
+  %hlsl.interlocked.or = call i32 
@llvm.dx.interlocked.or.i32.tdx.RawBuffer_s_struct.TestStructs_1_0t.i32(target("dx.RawBuffer",
 %struct.TestStruct, 1, 0) %buffer, i32 1, i32 4, i32 poison, i32 0)
   ; CHECK: store i32 [[INTERLOCKED]], ptr [[RETURN]]
   store i32 %hlsl.interlocked.or, ptr %returnVal, align 4
   ; CHECK: [[RETLOAD:]] = load i32, ptr [[RETURN]]
@@ -55,7 +55,7 @@ entry:
   ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 
216, %dx.types.Handle [[BIND]]
   %buffer = call target("dx.RawBuffer", %struct.TestStruct, 1, 0) 
@llvm.dx.resource.handlefrombinding.tdx.RawBuffer_s_struct.TestStructs_1_0t(i32 
0, i32 0, i32 1, i32 0, ptr null)
   ; CHECK: [[INTERLOCKED:%.*]] = call i32 @dx.op.atomicBinOp.i32(i32 78, 
%dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 4, i32 undef, i32 0)
-  %hlsl.interlocked.or = call i32 
@llvm.dx.resource.atomicbinop.tdx.RawBuffer_s_struct.TestStructs_1_0t(target("dx.RawBuffer",
 %struct.TestStruct, 1, 0) %buffer, i32 2, i32 1, i32 4, i32 undef, i32 0)
+  %hlsl.interlocked.or = call i32 
@llvm.dx.interlocked.or.i32.tdx.RawBuffer_s_struct.TestStructs_1_0t.i32(target("dx.RawBuffer",
 %struct.TestStruct, 1, 0) %buffer,  i32 1, i32 4, i32 poison, i32 0)
   ; CHECK: ret void
   ret void
 }
@@ -68,7 +68,7 @@ entry:
   ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 
216, %dx.types.Handle [[BIND]]
   %buffer = call target("dx.TypedBuffer", i32, 1, 0, 1) 
@llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_i32_1_0t(i32 0, i32 0, i32 
1, i32 0, ptr null)
   ; CHECK: [[INTERLOCKED:%.*]] = call i32 @dx.op.atomicBinOp.i32(i32 78, 
%dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 undef, i32 undef, i32 0)
-  %hlsl.interlocked.or = call i32 
@llvm.dx.resource.atomicbinop.tdx.TypedBuffer_i32_1_0_1t(target("dx.TypedBuffer",
 i32, 1, 0, 1) %buffer, i32 2, i32 1, i32 undef, i32 undef, i32 0)
+  %hlsl.interlocked.or = call i32 
@llvm.dx.interlocked.or.i32.tdx.TypedBuffer_i32_1_0_1t.i32(target("dx.TypedBuffer",
 i32, 1, 0, 1) %buffer,  i32 1, i32 poison, i32 poison, i32 0)
   ; CHECK: store i32 [[INTERLOCKED]], ptr [[RETURN]]
   store i32 %hlsl.interlocked.or, ptr %returnVal, align 4
   ; CHECK: [[RETLOAD:]] = load i32, ptr [[RETURN]]
@@ -83,7 +83,7 @@ entry:
   ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 
216, %dx.types.Handle [[BIND]]
   %buffer = call target("dx.TypedBuffer", i32, 1, 0, 1) 
@llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_i32_1_0t(i32 0, i32 0, i32 
1, i32 0, ptr null)
   ; CHECK: [[INTERLOCKED:%.*]] = call i32 @dx.op.atomicBinOp.i32(i32 78, 
%dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 undef, i32 undef, i32 0)
-  %hlsl.interlocked.or = call i32 
@llvm.dx.resource.atomicbinop.tdx.TypedBuffer_i32_1_0_1t(target("dx.TypedBuffer",
 i32, 1, 0, 1) %buffer, i32 2, i32 1, i32 undef, i32 undef, i32 0)
+  %hlsl.interlocked.or = call i32 
@llvm.dx.interlocked.or.i32.tdx.TypedBuffer_i32_1_0_1t.i32(target("dx.TypedBuffer",
 i32, 1, 0, 1) %buffer, i32 1, i32 poison, i32 poison, i32 0)
   ; CHECK: ret void
   ret void
 }
@@ -96,7 +96,7 @@ entry:
   ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 
216, %dx.types.Handle [[BIND]]
   %buffer = call target("dx.TypedBuffer", i32, 1, 0, 0) 
@llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_i32_1_0t(i32 0, i32 0, i32 
1, i32 0, ptr null)
   ; CHECK: [[INTERLOCKED:%.*]] = call i32 @dx.op.atomicBinOp.i32(i32 78, 
%dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 undef, i32 undef, i32 0)
-  %hlsl.interlocked.or = call i32 
@llvm.dx.resource.atomicbinop.tdx.TypedBuffer_i32_1_0_0t(target("dx.TypedBuffer",
 i32, 1, 0, 0) %buffer, i32 2, i32 1, i32 undef, i32 undef, i32 0)
+  %hlsl.interlocked.or = call i32 
@llvm.dx.interlocked.or.i32.tdx.TypedBuffer_i32_1_0_0t.i32(target("dx.TypedBuffer",
 i32, 1, 0, 0) %buffer, i32 1, i32 poison, i32 poison, i32 0)
   ; CHECK: store i32 [[INTERLOCKED]], ptr [[RETURN]]
   store i32 %hlsl.interlocked.or, ptr %returnVal, align 4
   ; CHECK: [[RETLOAD:]] = load i32, ptr [[RETURN]]
@@ -111,7 +111,131 @@ entry:
   ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 
216, %dx.types.Handle [[BIND]]
   %buffer = call target("dx.TypedBuffer", i32, 1, 0, 0) 
@llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_i32_1_0t(i32 0, i32 0, i32 
1, i32 0, ptr null)
   ; CHECK: [[INTERLOCKED:%.*]] = call i32 @dx.op.atomicBinOp.i32(i32 78, 
%dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 undef, i32 undef, i32 0)
-  %hlsl.interlocked.or = call i32 
@llvm.dx.resource.atomicbinop.tdx.TypedBuffer_i32_1_0_0t(target("dx.TypedBuffer",
 i32, 1, 0, 0) %buffer, i32 2, i32 1, i32 undef, i32 undef, i32 0)
+  %hlsl.interlocked.or = call i32 
@llvm.dx.interlocked.or.i32.tdx.TypedBuffer_i32_1_0_0t.i32(target("dx.TypedBuffer",
 i32, 1, 0, 0) %buffer, i32 1, i32 poison, i32 poison, i32 0)
   ; CHECK: ret void
   ret void
 }
+
+define i64 @_Z22byteaddr_test_return64() {
+entry:
+  ; CHECK: [[RETURN:%.*]] = alloca
+  %returnVal = alloca i64, align 8
+  ; CHECK: [[BIND:%.*]] = call %dx.types.Handle 
@dx.op.createHandleFromBinding(i32 217
+  ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 
216, %dx.types.Handle [[BIND]]
+  %buffer = call target("dx.RawBuffer", i8, 1, 0) 
@llvm.dx.resource.handlefrombinding.tdx.RawBuffer_i8_1_0t(i32 0, i32 0, i32 1, 
i32 0, ptr null)
+  ; CHECK: [[INTERLOCKED:%.*]] = call i64 @dx.op.atomicBinOp.i64(i32 78, 
%dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 undef, i32 undef, i64 0)
+  %hlsl.interlocked.or = call i64 
@llvm.dx.interlocked.or.i64.tdx.RawBuffer_i8_1_0t(target("dx.RawBuffer", i8, 1, 
0) %buffer, i32 1, i32 poison, i32 poison, i64 0)
+  ; CHECK: store i64 [[INTERLOCKED]], ptr [[RETURN]]
+  store i64 %hlsl.interlocked.or, ptr %returnVal, align 8
+  ; CHECK: [[RETLOAD:]] = load i64, ptr [[RETURN]]
+  %0 = load i64, ptr %returnVal, align 8
+  ; CHECK; ret i64 [[RETLOAD]]
+  ret i64 %0
+}
+
+define void @_Z25byteaddr_test_no_return64() {
+entry:
+  ; CHECK: [[BIND:%.*]] = call %dx.types.Handle 
@dx.op.createHandleFromBinding(i32 217
+  ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 
216, %dx.types.Handle [[BIND]]
+  %buffer = call target("dx.RawBuffer", i8, 1, 0) 
@llvm.dx.resource.handlefrombinding.tdx.RawBuffer_i8_1_0t(i32 0, i32 0, i32 1, 
i32 0, ptr null)
+  ; CHECK: [[INTERLOCKED:%.*]] = call i64 @dx.op.atomicBinOp.i64(i32 78, 
%dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 undef, i32 undef, i64 0)
+  %hlsl.interlocked.or = call i64 
@llvm.dx.interlocked.or.i64.tdx.RawBuffer_i8_1_0t(target("dx.RawBuffer", i8, 1, 
0) %buffer, i32 1, i32 poison, i32 poison, i64 0)
+  ; CHECK: ret void
+  ret void
+}
+
+%struct.TestStruct64 = type { i64, i64 }
+
+define i64 @_Z20struct_test_return64() {
+entry:
+  ; CHECK: [[RETURN:%.*]] = alloca
+  %returnVal = alloca i64, align 8
+  ; CHECK: [[BIND:%.*]] = call %dx.types.Handle 
@dx.op.createHandleFromBinding(i32 217
+  ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 
216, %dx.types.Handle [[BIND]]
+  %buffer = call target("dx.RawBuffer", %struct.TestStruct64, 1, 0) 
@llvm.dx.resource.handlefrombinding.tdx.RawBuffer_s_struct.TestStruct64s_1_0t(i32
 0, i32 0, i32 1, i32 0, ptr null)
+  ; CHECK: [[INTERLOCKED:%.*]] = call i64 @dx.op.atomicBinOp.i64(i32 78, 
%dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 8, i32 undef, i64 0)
+  %hlsl.interlocked.or = call i64 
@llvm.dx.interlocked.or.i64.tdx.RawBuffer_s_struct.TestStruct64s_1_0t(target("dx.RawBuffer",
 %struct.TestStruct64, 1, 0) %buffer, i32 1, i32 8, i32 poison, i64 0)
+  ; CHECK: store i64 [[INTERLOCKED]], ptr [[RETURN]]
+  store i64 %hlsl.interlocked.or, ptr %returnVal, align 8
+  ; CHECK: [[RETLOAD:]] = load i64, ptr [[RETURN]]
+  %0 = load i64, ptr %returnVal, align 8
+  ; CHECK; ret i64 [[RETLOAD]]
+  ret i64 %0
+}
+
+define void @_Z23struct_test_no_return64() {
+entry:
+  ; CHECK: [[BIND:%.*]] = call %dx.types.Handle 
@dx.op.createHandleFromBinding(i32 217
+  ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 
216, %dx.types.Handle [[BIND]]
+  %buffer = call target("dx.RawBuffer", %struct.TestStruct64, 1, 0) 
@llvm.dx.resource.handlefrombinding.tdx.RawBuffer_s_struct.TestStruct64s_1_0t(i32
 0, i32 0, i32 1, i32 0, ptr null)
+  ; CHECK: [[INTERLOCKED:%.*]] = call i64 @dx.op.atomicBinOp.i64(i32 78, 
%dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 8, i32 undef, i64 0)
+  %hlsl.interlocked.or = call i64 
@llvm.dx.interlocked.or.i64.tdx.RawBuffer_s_struct.TestStruct64s_1_0t(target("dx.RawBuffer",
 %struct.TestStruct64, 1, 0) %buffer, i32 1, i32 8, i32 poison, i64 0)
+  ; CHECK: ret void
+  ret void
+}
+
+define i64 @_Z23typed_int_test_return64() {
+entry:
+  ; CHECK: [[RETURN:%.*]] = alloca
+  %returnVal = alloca i64, align 8
+  ; CHECK: [[BIND:%.*]] = call %dx.types.Handle 
@dx.op.createHandleFromBinding(i32 217
+  ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 
216, %dx.types.Handle [[BIND]]
+  %buffer = call target("dx.TypedBuffer", i64, 1, 0, 1) 
@llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_i64_1_0t(i32 0, i32 0, i32 
1, i32 0, ptr null)
+  ; CHECK: [[INTERLOCKED:%.*]] = call i64 @dx.op.atomicBinOp.i64(i32 78, 
%dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 undef, i32 undef, i64 0)
+  %hlsl.interlocked.or = call i64 
@llvm.dx.interlocked.or.i64.tdx.TypedBuffer_i64_1_0_1t(target("dx.TypedBuffer", 
i64, 1, 0, 1) %buffer, i32 1, i32 poison, i32 poison, i64 0)
+  ; CHECK: store i64 [[INTERLOCKED]], ptr [[RETURN]]
+  store i64 %hlsl.interlocked.or, ptr %returnVal, align 8
+  ; CHECK: [[RETLOAD:]] = load i64, ptr [[RETURN]]
+  %0 = load i64, ptr %returnVal, align 8
+  ; CHECK; ret i64 [[RETLOAD]]
+  ret i64 %0
+}
+
+define void @_Z26typed_int_test_no_return64() {
+entry:
+  ; CHECK: [[BIND:%.*]] = call %dx.types.Handle 
@dx.op.createHandleFromBinding(i32 217
+  ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 
216, %dx.types.Handle [[BIND]]
+  %buffer = call target("dx.TypedBuffer", i64, 1, 0, 1) 
@llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_i64_1_0t(i32 0, i32 0, i32 
1, i32 0, ptr null)
+  ; CHECK: [[INTERLOCKED:%.*]] = call i64 @dx.op.atomicBinOp.i64(i32 78, 
%dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 undef, i32 undef, i64 0)
+  %hlsl.interlocked.or = call i64 
@llvm.dx.interlocked.or.i64.tdx.TypedBuffer_i64_1_0_1t(target("dx.TypedBuffer", 
i64, 1, 0, 1) %buffer, i32 1, i32 poison, i32 poison, i64 0)
+  ; CHECK: ret void
+  ret void
+}
+
+define i64 @_Z24typed_uint_test_return64() {
+entry:
+  ; CHECK: [[RETURN:%.*]] = alloca
+  %returnVal = alloca i64, align 8
+  ; CHECK: [[BIND:%.*]] = call %dx.types.Handle 
@dx.op.createHandleFromBinding(i32 217
+  ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 
216, %dx.types.Handle [[BIND]]
+  %buffer = call target("dx.TypedBuffer", i64, 1, 0, 0) 
@llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_i64_1_0t(i32 0, i32 0, i32 
1, i32 0, ptr null)
+  ; CHECK: [[INTERLOCKED:%.*]] = call i64 @dx.op.atomicBinOp.i64(i32 78, 
%dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 undef, i32 undef, i64 0)
+  %hlsl.interlocked.or = call i64 
@llvm.dx.interlocked.or.i64.tdx.TypedBuffer_i64_1_0_0t(target("dx.TypedBuffer", 
i64, 1, 0, 0) %buffer, i32 1, i32 poison, i32 poison, i64 0)
+  ; CHECK: store i64 [[INTERLOCKED]], ptr [[RETURN]]
+  store i64 %hlsl.interlocked.or, ptr %returnVal, align 8
+  ; CHECK: [[RETLOAD:]] = load i64, ptr [[RETURN]]
+  %0 = load i64, ptr %returnVal, align 8
+  ; CHECK; ret i64 [[RETLOAD]]
+  ret i64 %0
+}
+
+define void @_Z27typed_uint_test_no_return64() {
+entry:
+  ; CHECK: [[BIND:%.*]] = call %dx.types.Handle 
@dx.op.createHandleFromBinding(i32 217
+  ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 
216, %dx.types.Handle [[BIND]]
+  %buffer = call target("dx.TypedBuffer", i64, 1, 0, 0) 
@llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_i64_1_0t(i32 0, i32 0, i32 
1, i32 0, ptr null)
+  ; CHECK: [[INTERLOCKED:%.*]] = call i64 @dx.op.atomicBinOp.i64(i32 78, 
%dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 undef, i32 undef, i64 0)
+  %hlsl.interlocked.or = call i64 
@llvm.dx.interlocked.or.i64.tdx.TypedBuffer_i64_1_0_0t(target("dx.TypedBuffer", 
i64, 1, 0, 0) %buffer, i32 1, i32 poison, i32 poison, i64 0)
+  ; CHECK: ret void
+  ret void
+}
+
+declare i32 
@llvm.dx.interlocked.or.i32.tdx.RawBuffer_i8_1_0t.i32(target("dx.RawBuffer", 
i8, 1, 0) %0, i32 %1, i32 %2, i32 %3, i32 %4)
+declare i32 
@llvm.dx.interlocked.or.i32.tdx.RawBuffer_s_struct.TestStructs_1_0t.i32(target("dx.RawBuffer",
 %struct.TestStruct, 1, 0) %0, i32 %1, i32 %2, i32 %3, i32 %4)
+declare i32 
@llvm.dx.interlocked.or.i32.tdx.TypedBuffer_i32_1_0_1t.i32(target("dx.TypedBuffer",
 i32, 1, 0, 1) %0, i32 %1, i32 %2, i32 %3, i32 %4)
+declare i32 
@llvm.dx.interlocked.or.i32.tdx.TypedBuffer_i32_1_0_0t.i32(target("dx.TypedBuffer",
 i32, 1, 0, 0) %0, i32 %1, i32 %2, i32 %3, i32 %4)
+
+declare i64 
@llvm.dx.interlocked.or.i64.tdx.RawBuffer_i8_1_0t.i64(target("dx.RawBuffer", 
i8, 1, 0) %0, i32 %1, i32 %2, i32 %3, i64 %4)
+declare i64 
@llvm.dx.interlocked.or.i64.tdx.RawBuffer_s_struct.TestStruct64s_1_0t.i64(target("dx.RawBuffer",
 %struct.TestStruct64, 1, 0) %0, i32 %1, i32 %2, i32 %3, i64 %4)
+declare i64 
@llvm.dx.interlocked.or.i64.tdx.TypedBuffer_i32_1_0_1t.i64(target("dx.TypedBuffer",
 i64, 1, 0, 1) %0, i32 %1, i32 %2, i32 %3, i64 %4)
+declare i64 
@llvm.dx.interlocked.or.i64.tdx.TypedBuffer_i32_1_0_0t.i64(target("dx.TypedBuffer",
 i64, 1, 0, 0) %0, i32 %1, i32 %2, i32 %3, i64 %4)
diff --git a/llvm/test/CodeGen/DirectX/interlocked-or64.ll 
b/llvm/test/CodeGen/DirectX/interlocked-or64.ll
deleted file mode 100644
index 0aaf170c8eea1..0000000000000
--- a/llvm/test/CodeGen/DirectX/interlocked-or64.ll
+++ /dev/null
@@ -1,117 +0,0 @@
-; RUN: opt -S -dxil-op-lower %s | FileCheck %s
-
-target triple = "dxil-pc-shadermodel6.6-compute"
-
-define i64 @_Z20byteaddr_test_return() {
-entry:
-  ; CHECK: [[RETURN:%.*]] = alloca
-  %returnVal = alloca i64, align 8
-  ; CHECK: [[BIND:%.*]] = call %dx.types.Handle 
@dx.op.createHandleFromBinding(i32 217
-  ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 
216, %dx.types.Handle [[BIND]]
-  %buffer = call target("dx.RawBuffer", i8, 1, 0) 
@llvm.dx.resource.handlefrombinding.tdx.RawBuffer_i8_1_0t(i32 0, i32 0, i32 1, 
i32 0, ptr null)
-  ; CHECK: [[INTERLOCKED:%.*]] = call i64 @dx.op.atomicBinOp.i64(i32 78, 
%dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 undef, i32 undef, i64 0)
-  %hlsl.interlocked.or = call i64 
@llvm.dx.resource.atomicbinop64.tdx.RawBuffer_i8_1_0t(target("dx.RawBuffer", 
i8, 1, 0) %buffer, i32 2, i32 1, i32 undef, i32 undef, i64 0)
-  ; CHECK: store i64 [[INTERLOCKED]], ptr [[RETURN]]
-  store i64 %hlsl.interlocked.or, ptr %returnVal, align 8
-  ; CHECK: [[RETLOAD:]] = load i64, ptr [[RETURN]]
-  %0 = load i64, ptr %returnVal, align 8
-  ; CHECK; ret i64 [[RETLOAD]]
-  ret i64 %0
-}
-
-define void @_Z23byteaddr_test_no_return() {
-entry:
-  ; CHECK: [[BIND:%.*]] = call %dx.types.Handle 
@dx.op.createHandleFromBinding(i32 217
-  ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 
216, %dx.types.Handle [[BIND]]
-  %buffer = call target("dx.RawBuffer", i8, 1, 0) 
@llvm.dx.resource.handlefrombinding.tdx.RawBuffer_i8_1_0t(i32 0, i32 0, i32 1, 
i32 0, ptr null)
-  ; CHECK: [[INTERLOCKED:%.*]] = call i64 @dx.op.atomicBinOp.i64(i32 78, 
%dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 undef, i32 undef, i64 0)
-  %hlsl.interlocked.or = call i64 
@llvm.dx.resource.atomicbinop64.tdx.RawBuffer_i8_1_0t(target("dx.RawBuffer", 
i8, 1, 0) %buffer, i32 2, i32 1, i32 undef, i32 undef, i64 0)
-  ; CHECK: ret void
-  ret void
-}
-
-%struct.TestStruct = type { i64, i64 }
-
-define i64 @_Z18struct_test_return() {
-entry:
-  ; CHECK: [[RETURN:%.*]] = alloca
-  %returnVal = alloca i64, align 8
-  ; CHECK: [[BIND:%.*]] = call %dx.types.Handle 
@dx.op.createHandleFromBinding(i32 217
-  ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 
216, %dx.types.Handle [[BIND]]
-  %buffer = call target("dx.RawBuffer", %struct.TestStruct, 1, 0) 
@llvm.dx.resource.handlefrombinding.tdx.RawBuffer_s_struct.TestStructs_1_0t(i32 
0, i32 0, i32 1, i32 0, ptr null)
-  ; CHECK: [[INTERLOCKED:%.*]] = call i64 @dx.op.atomicBinOp.i64(i32 78, 
%dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 8, i32 undef, i64 0)
-  %hlsl.interlocked.or = call i64 
@llvm.dx.resource.atomicbinop64.tdx.RawBuffer_s_struct.TestStructs_1_0t(target("dx.RawBuffer",
 %struct.TestStruct, 1, 0) %buffer, i32 2, i32 1, i32 8, i32 undef, i64 0)
-  ; CHECK: store i64 [[INTERLOCKED]], ptr [[RETURN]]
-  store i64 %hlsl.interlocked.or, ptr %returnVal, align 8
-  ; CHECK: [[RETLOAD:]] = load i64, ptr [[RETURN]]
-  %0 = load i64, ptr %returnVal, align 8
-  ; CHECK; ret i64 [[RETLOAD]]
-  ret i64 %0
-}
-
-define void @_Z21struct_test_no_return() {
-entry:
-  ; CHECK: [[BIND:%.*]] = call %dx.types.Handle 
@dx.op.createHandleFromBinding(i32 217
-  ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 
216, %dx.types.Handle [[BIND]]
-  %buffer = call target("dx.RawBuffer", %struct.TestStruct, 1, 0) 
@llvm.dx.resource.handlefrombinding.tdx.RawBuffer_s_struct.TestStructs_1_0t(i32 
0, i32 0, i32 1, i32 0, ptr null)
-  ; CHECK: [[INTERLOCKED:%.*]] = call i64 @dx.op.atomicBinOp.i64(i32 78, 
%dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 8, i32 undef, i64 0)
-  %hlsl.interlocked.or = call i64 
@llvm.dx.resource.atomicbinop64.tdx.RawBuffer_s_struct.TestStructs_1_0t(target("dx.RawBuffer",
 %struct.TestStruct, 1, 0) %buffer, i32 2, i32 1, i32 8, i32 undef, i64 0)
-  ; CHECK: ret void
-  ret void
-}
-
-define i64 @_Z21typed_int_test_return() {
-entry:
-  ; CHECK: [[RETURN:%.*]] = alloca
-  %returnVal = alloca i64, align 8
-  ; CHECK: [[BIND:%.*]] = call %dx.types.Handle 
@dx.op.createHandleFromBinding(i32 217
-  ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 
216, %dx.types.Handle [[BIND]]
-  %buffer = call target("dx.TypedBuffer", i64, 1, 0, 1) 
@llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_i64_1_0t(i32 0, i32 0, i32 
1, i32 0, ptr null)
-  ; CHECK: [[INTERLOCKED:%.*]] = call i64 @dx.op.atomicBinOp.i64(i32 78, 
%dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 undef, i32 undef, i64 0)
-  %hlsl.interlocked.or = call i64 
@llvm.dx.resource.atomicbinop64.tdx.TypedBuffer_i64_1_0_1t(target("dx.TypedBuffer",
 i64, 1, 0, 1) %buffer, i32 2, i32 1, i32 undef, i32 undef, i64 0)
-  ; CHECK: store i64 [[INTERLOCKED]], ptr [[RETURN]]
-  store i64 %hlsl.interlocked.or, ptr %returnVal, align 8
-  ; CHECK: [[RETLOAD:]] = load i64, ptr [[RETURN]]
-  %0 = load i64, ptr %returnVal, align 8
-  ; CHECK; ret i64 [[RETLOAD]]
-  ret i64 %0
-}
-
-define void @_Z24typed_int_test_no_return() {
-entry:
-  ; CHECK: [[BIND:%.*]] = call %dx.types.Handle 
@dx.op.createHandleFromBinding(i32 217
-  ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 
216, %dx.types.Handle [[BIND]]
-  %buffer = call target("dx.TypedBuffer", i64, 1, 0, 1) 
@llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_i64_1_0t(i32 0, i32 0, i32 
1, i32 0, ptr null)
-  ; CHECK: [[INTERLOCKED:%.*]] = call i64 @dx.op.atomicBinOp.i64(i32 78, 
%dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 undef, i32 undef, i64 0)
-  %hlsl.interlocked.or = call i64 
@llvm.dx.resource.atomicbinop64.tdx.TypedBuffer_i64_1_0_1t(target("dx.TypedBuffer",
 i64, 1, 0, 1) %buffer, i32 2, i32 1, i32 undef, i32 undef, i64 0)
-  ; CHECK: ret void
-  ret void
-}
-
-define i64 @_Z22typed_uint_test_return() {
-entry:
-  ; CHECK: [[RETURN:%.*]] = alloca
-  %returnVal = alloca i64, align 8
-  ; CHECK: [[BIND:%.*]] = call %dx.types.Handle 
@dx.op.createHandleFromBinding(i32 217
-  ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 
216, %dx.types.Handle [[BIND]]
-  %buffer = call target("dx.TypedBuffer", i64, 1, 0, 0) 
@llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_i64_1_0t(i32 0, i32 0, i32 
1, i32 0, ptr null)
-  ; CHECK: [[INTERLOCKED:%.*]] = call i64 @dx.op.atomicBinOp.i64(i32 78, 
%dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 undef, i32 undef, i64 0)
-  %hlsl.interlocked.or = call i64 
@llvm.dx.resource.atomicbinop64.tdx.TypedBuffer_i64_1_0_0t(target("dx.TypedBuffer",
 i64, 1, 0, 0) %buffer, i32 2, i32 1, i32 undef, i32 undef, i64 0)
-  ; CHECK: store i64 [[INTERLOCKED]], ptr [[RETURN]]
-  store i64 %hlsl.interlocked.or, ptr %returnVal, align 8
-  ; CHECK: [[RETLOAD:]] = load i64, ptr [[RETURN]]
-  %0 = load i64, ptr %returnVal, align 8
-  ; CHECK; ret i64 [[RETLOAD]]
-  ret i64 %0
-}
-
-define void @_Z25typed_uint_test_no_return() {
-entry:
-  ; CHECK: [[BIND:%.*]] = call %dx.types.Handle 
@dx.op.createHandleFromBinding(i32 217
-  ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 
216, %dx.types.Handle [[BIND]]
-  %buffer = call target("dx.TypedBuffer", i64, 1, 0, 0) 
@llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_i64_1_0t(i32 0, i32 0, i32 
1, i32 0, ptr null)
-  ; CHECK: [[INTERLOCKED:%.*]] = call i64 @dx.op.atomicBinOp.i64(i32 78, 
%dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 undef, i32 undef, i64 0)
-  %hlsl.interlocked.or = call i64 
@llvm.dx.resource.atomicbinop64.tdx.TypedBuffer_i64_1_0_0t(target("dx.TypedBuffer",
 i64, 1, 0, 0) %buffer, i32 2, i32 1, i32 undef, i32 undef, i64 0)
-  ; CHECK: ret void
-  ret void
-}

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to