[clang] [clang][CodeGen][SPIR-V] Fix incorrect SYCL usage, implement missing interface (PR #109415)

2024-09-25 Thread Alex Voicu via cfe-commits

https://github.com/AlexVlx updated 
https://github.com/llvm/llvm-project/pull/109415

>From 75ca598c7e8a583545f50ee2c526556df261cc7f Mon Sep 17 00:00:00 2001
From: Alex Voicu 
Date: Fri, 20 Sep 2024 13:25:49 +0100
Subject: [PATCH 1/2] Implement `getGlobalVarAddressSpace` for SPIR-V; stop
 using SYCL for tests.

---
 clang/lib/Basic/Targets/SPIR.h|  5 
 clang/lib/CodeGen/Targets/SPIR.cpp| 23 
 .../CodeGenCXX/dynamic-cast-address-space.cpp | 23 
 .../test/CodeGenCXX/spirv-amdgcn-float16.cpp  | 27 ++-
 .../template-param-objects-address-space.cpp  |  4 +--
 ...w-expression-typeinfo-in-address-space.cpp |  2 +-
 .../try-catch-with-address-space.cpp  |  6 ++---
 .../typeid-cxx11-with-address-space.cpp   |  2 +-
 .../CodeGenCXX/typeid-with-address-space.cpp  |  4 +--
 .../typeinfo-with-address-space.cpp   | 14 +-
 .../vtable-assume-load-address-space.cpp  | 22 +++
 ...e-pointer-initialization-address-space.cpp |  2 +-
 clang/test/CodeGenCXX/vtt-address-space.cpp   |  2 +-
 .../CodeGenOpenCL/builtins-amdgcn-gfx11.cl|  4 +++
 clang/test/CodeGenOpenCL/builtins-amdgcn.cl   | 25 +++--
 15 files changed, 105 insertions(+), 60 deletions(-)

diff --git a/clang/lib/Basic/Targets/SPIR.h b/clang/lib/Basic/Targets/SPIR.h
index 37cf9d7921bac5..4dbc64d27ba909 100644
--- a/clang/lib/Basic/Targets/SPIR.h
+++ b/clang/lib/Basic/Targets/SPIR.h
@@ -380,6 +380,7 @@ class LLVM_LIBRARY_VISIBILITY SPIRV64AMDGCNTargetInfo final
 PointerWidth = PointerAlign = 64;
 SizeType = TargetInfo::UnsignedLong;
 PtrDiffType = IntPtrType = TargetInfo::SignedLong;
+AddrSpaceMap = &SPIRDefIsGenMap;
 
 resetDataLayout("e-i64:64-v16:16-v24:32-v32:32-v48:64-"
 "v96:128-v192:256-v256:256-v512:512-v1024:1024-G1-P4-A0");
@@ -412,6 +413,10 @@ class LLVM_LIBRARY_VISIBILITY SPIRV64AMDGCNTargetInfo final
 
   void setAuxTarget(const TargetInfo *Aux) override;
 
+  void adjust(DiagnosticsEngine &Diags, LangOptions &Opts) override {
+TargetInfo::adjust(Diags, Opts);
+  }
+
   bool hasInt128Type() const override { return TargetInfo::hasInt128Type(); }
 };
 
diff --git a/clang/lib/CodeGen/Targets/SPIR.cpp 
b/clang/lib/CodeGen/Targets/SPIR.cpp
index cc52925e2e523f..975b48afa03fcd 100644
--- a/clang/lib/CodeGen/Targets/SPIR.cpp
+++ b/clang/lib/CodeGen/Targets/SPIR.cpp
@@ -58,6 +58,8 @@ class SPIRVTargetCodeGenInfo : public 
CommonSPIRTargetCodeGenInfo {
   SPIRVTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT)
   : CommonSPIRTargetCodeGenInfo(std::make_unique(CGT)) {}
   void setCUDAKernelCallingConvention(const FunctionType *&FT) const override;
+  LangAS getGlobalVarAddressSpace(CodeGenModule &CGM,
+  const VarDecl *D) const override;
 };
 } // End anonymous namespace.
 
@@ -188,6 +190,27 @@ void 
SPIRVTargetCodeGenInfo::setCUDAKernelCallingConvention(
   }
 }
 
+LangAS SPIRVTargetCodeGenInfo::getGlobalVarAddressSpace(
+CodeGenModule &CGM, const VarDecl *D) const {
+  assert(!CGM.getLangOpts().OpenCL &&
+ !(CGM.getLangOpts().CUDA && CGM.getLangOpts().CUDAIsDevice) &&
+ "Address space agnostic languages only");
+  // If we're here it means that we're using the SPIRDefIsGen ASMap, hence for
+  // the global AS we can rely on either cuda_device or sycl_global to be
+  // correct; however, since this is not a CUDA Device context, we use
+  // sycl_global to prevent confusion with the assertion.
+  LangAS DefaultGlobalAS = getLangASFromTargetAS(
+  CGM.getContext().getTargetAddressSpace(LangAS::sycl_global));
+  if (!D)
+return DefaultGlobalAS;
+
+  LangAS AddrSpace = D->getType().getAddressSpace();
+  if (AddrSpace != LangAS::Default)
+return AddrSpace;
+
+  return DefaultGlobalAS;
+}
+
 /// Construct a SPIR-V target extension type for the given OpenCL image type.
 static llvm::Type *getSPIRVImageType(llvm::LLVMContext &Ctx, StringRef 
BaseType,
  StringRef OpenCLName,
diff --git a/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp 
b/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp
index 3d5e32516c7af2..b967701ca1fa96 100644
--- a/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp
+++ b/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --check-globals all --no-generate-body-for-unused-prefixes --version 4
 // RUN: %clang_cc1 -I%S %s -triple amdgcn-amd-amdhsa -emit-llvm 
-fcxx-exceptions -fexceptions -o - | FileCheck %s
-// RUN: %clang_cc1 -I%S %s -triple spirv64-unknown-unknown -fsycl-is-device 
-emit-llvm -fcxx-exceptions -fexceptions -o - | FileCheck %s 
--check-prefix=WITH-NONZERO-DEFAULT-AS
+// RUN: %clang_cc1 -I%S %s -triple spirv64-amd-amdhsa -emit-llvm 
-fcxx-exceptions -fexceptions -o - | FileCheck %s 
--check-prefix=WITH-NONZERO-DEFAULT-AS
 
 struct A { virtual voi

[clang] [clang][CodeGen][SPIR-V] Fix incorrect SYCL usage, implement missing interface (PR #109415)

2024-09-25 Thread Alex Voicu via cfe-commits

AlexVlx wrote:

> > > Are there any tests available to check this behavior?
> > 
> > 
> > The reworked tests do verify / rely on this behaviour, but I can add an 
> > individual test for both vanilla and AMDGCN flavoured SPIR-V, if that is 
> > preferred (might be better anyway).
> 
> Thanks @AlexVlx I will approve this change. You can add the tests as part of 
> this PR or a follow-up PR. Either way is fine. Sincerely

Thank you for the review; I'll punt on adding targeted tests here because I 
*think* we'll have to add a few more of these queries, and I can just bundle 
specific tests there.

https://github.com/llvm/llvm-project/pull/109415
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [clang][CodeGen][SPIR-V] Fix incorrect SYCL usage, implement missing interface (PR #109415)

2024-09-25 Thread Alex Voicu via cfe-commits

https://github.com/AlexVlx updated 
https://github.com/llvm/llvm-project/pull/109415

>From 75ca598c7e8a583545f50ee2c526556df261cc7f Mon Sep 17 00:00:00 2001
From: Alex Voicu 
Date: Fri, 20 Sep 2024 13:25:49 +0100
Subject: [PATCH 1/2] Implement `getGlobalVarAddressSpace` for SPIR-V; stop
 using SYCL for tests.

---
 clang/lib/Basic/Targets/SPIR.h|  5 
 clang/lib/CodeGen/Targets/SPIR.cpp| 23 
 .../CodeGenCXX/dynamic-cast-address-space.cpp | 23 
 .../test/CodeGenCXX/spirv-amdgcn-float16.cpp  | 27 ++-
 .../template-param-objects-address-space.cpp  |  4 +--
 ...w-expression-typeinfo-in-address-space.cpp |  2 +-
 .../try-catch-with-address-space.cpp  |  6 ++---
 .../typeid-cxx11-with-address-space.cpp   |  2 +-
 .../CodeGenCXX/typeid-with-address-space.cpp  |  4 +--
 .../typeinfo-with-address-space.cpp   | 14 +-
 .../vtable-assume-load-address-space.cpp  | 22 +++
 ...e-pointer-initialization-address-space.cpp |  2 +-
 clang/test/CodeGenCXX/vtt-address-space.cpp   |  2 +-
 .../CodeGenOpenCL/builtins-amdgcn-gfx11.cl|  4 +++
 clang/test/CodeGenOpenCL/builtins-amdgcn.cl   | 25 +++--
 15 files changed, 105 insertions(+), 60 deletions(-)

diff --git a/clang/lib/Basic/Targets/SPIR.h b/clang/lib/Basic/Targets/SPIR.h
index 37cf9d7921bac5..4dbc64d27ba909 100644
--- a/clang/lib/Basic/Targets/SPIR.h
+++ b/clang/lib/Basic/Targets/SPIR.h
@@ -380,6 +380,7 @@ class LLVM_LIBRARY_VISIBILITY SPIRV64AMDGCNTargetInfo final
 PointerWidth = PointerAlign = 64;
 SizeType = TargetInfo::UnsignedLong;
 PtrDiffType = IntPtrType = TargetInfo::SignedLong;
+AddrSpaceMap = &SPIRDefIsGenMap;
 
 resetDataLayout("e-i64:64-v16:16-v24:32-v32:32-v48:64-"
 "v96:128-v192:256-v256:256-v512:512-v1024:1024-G1-P4-A0");
@@ -412,6 +413,10 @@ class LLVM_LIBRARY_VISIBILITY SPIRV64AMDGCNTargetInfo final
 
   void setAuxTarget(const TargetInfo *Aux) override;
 
+  void adjust(DiagnosticsEngine &Diags, LangOptions &Opts) override {
+TargetInfo::adjust(Diags, Opts);
+  }
+
   bool hasInt128Type() const override { return TargetInfo::hasInt128Type(); }
 };
 
diff --git a/clang/lib/CodeGen/Targets/SPIR.cpp 
b/clang/lib/CodeGen/Targets/SPIR.cpp
index cc52925e2e523f..975b48afa03fcd 100644
--- a/clang/lib/CodeGen/Targets/SPIR.cpp
+++ b/clang/lib/CodeGen/Targets/SPIR.cpp
@@ -58,6 +58,8 @@ class SPIRVTargetCodeGenInfo : public 
CommonSPIRTargetCodeGenInfo {
   SPIRVTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT)
   : CommonSPIRTargetCodeGenInfo(std::make_unique(CGT)) {}
   void setCUDAKernelCallingConvention(const FunctionType *&FT) const override;
+  LangAS getGlobalVarAddressSpace(CodeGenModule &CGM,
+  const VarDecl *D) const override;
 };
 } // End anonymous namespace.
 
@@ -188,6 +190,27 @@ void 
SPIRVTargetCodeGenInfo::setCUDAKernelCallingConvention(
   }
 }
 
+LangAS SPIRVTargetCodeGenInfo::getGlobalVarAddressSpace(
+CodeGenModule &CGM, const VarDecl *D) const {
+  assert(!CGM.getLangOpts().OpenCL &&
+ !(CGM.getLangOpts().CUDA && CGM.getLangOpts().CUDAIsDevice) &&
+ "Address space agnostic languages only");
+  // If we're here it means that we're using the SPIRDefIsGen ASMap, hence for
+  // the global AS we can rely on either cuda_device or sycl_global to be
+  // correct; however, since this is not a CUDA Device context, we use
+  // sycl_global to prevent confusion with the assertion.
+  LangAS DefaultGlobalAS = getLangASFromTargetAS(
+  CGM.getContext().getTargetAddressSpace(LangAS::sycl_global));
+  if (!D)
+return DefaultGlobalAS;
+
+  LangAS AddrSpace = D->getType().getAddressSpace();
+  if (AddrSpace != LangAS::Default)
+return AddrSpace;
+
+  return DefaultGlobalAS;
+}
+
 /// Construct a SPIR-V target extension type for the given OpenCL image type.
 static llvm::Type *getSPIRVImageType(llvm::LLVMContext &Ctx, StringRef 
BaseType,
  StringRef OpenCLName,
diff --git a/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp 
b/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp
index 3d5e32516c7af2..b967701ca1fa96 100644
--- a/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp
+++ b/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --check-globals all --no-generate-body-for-unused-prefixes --version 4
 // RUN: %clang_cc1 -I%S %s -triple amdgcn-amd-amdhsa -emit-llvm 
-fcxx-exceptions -fexceptions -o - | FileCheck %s
-// RUN: %clang_cc1 -I%S %s -triple spirv64-unknown-unknown -fsycl-is-device 
-emit-llvm -fcxx-exceptions -fexceptions -o - | FileCheck %s 
--check-prefix=WITH-NONZERO-DEFAULT-AS
+// RUN: %clang_cc1 -I%S %s -triple spirv64-amd-amdhsa -emit-llvm 
-fcxx-exceptions -fexceptions -o - | FileCheck %s 
--check-prefix=WITH-NONZERO-DEFAULT-AS
 
 struct A { virtual voi

[clang] [llvm] [SPIRV][RFC] Rework / extend support for memory scopes (PR #106429)

2024-09-24 Thread Alex Voicu via cfe-commits

AlexVlx wrote:

I've created  
for the (potential) Translator work.

https://github.com/llvm/llvm-project/pull/106429
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [SPIRV][RFC] Rework / extend support for memory scopes (PR #106429)

2024-09-24 Thread Alex Voicu via cfe-commits

https://github.com/AlexVlx closed 
https://github.com/llvm/llvm-project/pull/106429
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [clang][CodeGen][SPIR-V] Fix incorrect SYCL usage, implement missing interface (PR #109415)

2024-09-24 Thread Alex Voicu via cfe-commits

https://github.com/AlexVlx edited 
https://github.com/llvm/llvm-project/pull/109415
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [clang][CodeGen][SPIR-V] Fix incorrect SYCL usage, implement missing interface (PR #109415)

2024-09-24 Thread Alex Voicu via cfe-commits

https://github.com/AlexVlx commented:

> Are there any tests available to check this behavior?

The reworked tests do verify / rely on this behaviour, but I can add an 
individual test for both vanilla and AMDGCN flavoured SPIR-V, if that is 
preferred (might be better anyway).

https://github.com/llvm/llvm-project/pull/109415
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [SPIRV][RFC] Rework / extend support for memory scopes (PR #106429)

2024-09-24 Thread Alex Voicu via cfe-commits

AlexVlx wrote:

Thank you ever so much for the review @VyacheslavLevytskyy! I will create a PR 
for the Translator as well, since there's some handling missing there; I will 
refer to it here for future readers. Final check: are you OK with the OpenCL 
changes @yxsamliu?

https://github.com/llvm/llvm-project/pull/106429
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [lld] [llvm] [mlir] [IR] Introduce `T` to `DataLayout` to represent flat address space if a target supports it (PR #108786)

2024-09-24 Thread Alex Voicu via cfe-commits

AlexVlx wrote:

> I am personally not opposed to the DL solution, but I guess it's a big change 
> that feels too much for me to approve. I'm also worried the "flat" concept is 
> a special case of something more generic and we should instead define that. 
> E.g.,

In reply to your question above re whether this is a DL or a Target property, I 
don't have a strong opinion there (it appears @shiltian and @arsenm might). I 
do believe that this is a necessary bit of query-able information, especially 
from a Clang, for correctness reasons (more on that below).

> * Does flat imply all AS can cast to it? If so, what happens if that is not 
> true anymore in the future. If it's not implied, how do we query what's 
> allowed and what is not?

To make this generically useful, it probably should, but I don't think the 
current words imply it.

> * Is there a value in not defining 0 as flat? It's what we basically assume 
> everywhere already, isn't it?

Ah, this is part of the challenge - we do indeed assume that 0 is flat, but 
Targets aren't bound by LangRef to use 0 to denote flat (and some, like SPIR / 
SPIR-V) do not, but rather problematically use 0 to denote Private, which is 
extremely constrained. This leads to problematic IR, due to the assumption you 
mentioned. An example I ran into the other day is 
[`emitCalloc`](https://github.com/llvm/llvm-project/blob/5dc15ddf575978e0115b1a6edacb59f056792a80/llvm/lib/Transforms/Utils/BuildLibCalls.cpp#L1989)
 - note it just uses the default pointer type for its return, which is 
problematic unless 0 is flat(-ish). It is, of course, fixable via other means, 
but it's just an example of where we rely on 0 just working. 

Perhaps another solution(?) here would be to tweak LangRef to be binding in 
that it spells out that iff a particular Target has a Flat AS, that will be 
reflected in LLVM as AS0? 



https://github.com/llvm/llvm-project/pull/108786
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [SPIRV][RFC] Rework / extend support for memory scopes (PR #106429)

2024-09-23 Thread Alex Voicu via cfe-commits


@@ -766,8 +766,19 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr 
*Expr, Address Dest,
   // LLVM atomic instructions always have synch scope. If clang atomic
   // expression has no scope operand, use default LLVM synch scope.
   if (!ScopeModel) {
+llvm::SyncScope::ID SS;
+if (CGF.getLangOpts().OpenCL)
+  // OpenCL approach is: "The functions that do not have memory_scope
+  // argument have the same semantics as the corresponding functions with
+  // the memory_scope argument set to memory_scope_device." See ref.:
+  // 
https://registry.khronos.org/OpenCL/specs/3.0-unified/html/OpenCL_C.html#atomic-functions
+  SS = CGF.getTargetHooks().getLLVMSyncScopeID(CGF.getLangOpts(),

AlexVlx wrote:

Done.

https://github.com/llvm/llvm-project/pull/106429
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [SPIRV][RFC] Rework / extend support for memory scopes (PR #106429)

2024-09-20 Thread Alex Voicu via cfe-commits


@@ -58,7 +58,35 @@ class SPIRVTargetCodeGenInfo : public 
CommonSPIRTargetCodeGenInfo {
   SPIRVTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT)
   : CommonSPIRTargetCodeGenInfo(std::make_unique(CGT)) {}
   void setCUDAKernelCallingConvention(const FunctionType *&FT) const override;
+  llvm::SyncScope::ID getLLVMSyncScopeID(const LangOptions &LangOpts,
+ SyncScope Scope,
+ llvm::AtomicOrdering Ordering,
+ llvm::LLVMContext &Ctx) const 
override;
 };
+
+inline StringRef mapClangSyncScopeToLLVM(SyncScope Scope) {
+  switch (Scope) {
+  case SyncScope::HIPSingleThread:
+  case SyncScope::SingleScope:
+return "singlethread";
+  case SyncScope::HIPWavefront:
+  case SyncScope::OpenCLSubGroup:
+  case SyncScope::WavefrontScope:
+return "subgroup";
+  case SyncScope::HIPWorkgroup:
+  case SyncScope::OpenCLWorkGroup:
+  case SyncScope::WorkgroupScope:
+return "workgroup";
+  case SyncScope::HIPAgent:
+  case SyncScope::OpenCLDevice:
+  case SyncScope::DeviceScope:
+return "device";
+  case SyncScope::SystemScope:
+  case SyncScope::HIPSystem:
+  case SyncScope::OpenCLAllSVMDevices:
+return "";
+  }

AlexVlx wrote:

Done.

https://github.com/llvm/llvm-project/pull/106429
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [SPIRV][RFC] Rework / extend support for memory scopes (PR #106429)

2024-09-20 Thread Alex Voicu via cfe-commits


@@ -1,7 +1,7 @@
 ; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s 
--check-prefix=CHECK-SPIRV
 
 ; CHECK-SPIRV:  %[[#Int:]] = OpTypeInt 32 0
-; CHECK-SPIRV-DAG:  %[[#MemScope_Device:]] = OpConstant %[[#Int]] 1
+; CHECK-SPIRV-DAG:  %[[#MemScope_AllSvmDevices:]] = OpConstant %[[#Int]] 0

AlexVlx wrote:

Done.

https://github.com/llvm/llvm-project/pull/106429
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [SPIRV][RFC] Rework / extend support for memory scopes (PR #106429)

2024-09-20 Thread Alex Voicu via cfe-commits


@@ -0,0 +1,163 @@
+; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown %s -o - 
| FileCheck %s
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - 
-filetype=obj | spirv-val %}
+
+; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv32-unknown-unknown %s -o - 
| FileCheck %s
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown %s -o - 
-filetype=obj | spirv-val %}
+
+; CHECK: %[[#Int:]] = OpTypeInt 32 0
+; CHECK-DAG: %[[#Float:]] = OpTypeFloat 32
+; CHECK-DAG: %[[#Scope_AllSVMDevices:]] = OpConstant %[[#Int]] 0
+; CHECK-DAG: %[[#Value:]] = OpConstant %[[#Int]] 42
+; CHECK-DAG: %[[#FPValue:]] = OpConstant %[[#Float]] 42
+; CHECK-DAG: %[[#Scope_Invocation:]] = OpConstant %[[#Int]] 4
+; CHECK-DAG: %[[#MemSem_SeqCst:]] = OpConstant %[[#Int]] 16
+; CHECK-DAG: %[[#Scope_Subgroup:]] = OpConstant %[[#Int]] 3
+; CHECK-DAG: %[[#Scope_Workgroup:]] = OpConstant %[[#Int]] 2
+; CHECK-DAG: %[[#Scope_Device:]] = OpConstant %[[#Int]] 1
+; CHECK-DAG: %[[#PointerType:]] = OpTypePointer CrossWorkgroup %[[#Int]]
+; CHECK-DAG: %[[#FPPointerType:]] = OpTypePointer CrossWorkgroup %[[#Float]]
+; CHECK-DAG: %[[#Pointer:]] = OpVariable %[[#PointerType]] CrossWorkgroup
+; CHECK-DAG: %[[#FPPointer:]] = OpVariable %[[#FPPointerType]] CrossWorkgroup
+
+@ui = common dso_local addrspace(1) global i32 0, align 4
+@f = common dso_local local_unnamed_addr addrspace(1) global float 
0.00e+00, align 4
+
+define dso_local spir_func void @test_singlethread_atomicrmw() 
local_unnamed_addr {
+entry:
+  %0 = atomicrmw xchg i32 addrspace(1)* @ui, i32 42 syncscope("singlethread") 
seq_cst
+  ; CHECK: %[[#]] = OpAtomicExchange %[[#Int]] %[[#Pointer:]] 
%[[#Scope_Invocation:]] %[[#MemSem_SeqCst:]] %[[#Value:]]
+  %1 = atomicrmw xchg float addrspace(1)* @f, float 42.00e+00 
syncscope("singlethread") seq_cst
+  ; CHECK: %[[#]] = OpAtomicExchange %[[#Float:]] %[[#FPPointer:]] 
%[[#Scope_Invocation:]] %[[#MemSem_SeqCst:]] %[[#FPValue:]]
+  %2 = atomicrmw add i32 addrspace(1)* @ui, i32 42 syncscope("singlethread") 
seq_cst
+  ; CHECK: %[[#]] = OpAtomicIAdd %[[#Int]] %[[#Pointer:]] 
%[[#Scope_Invocation:]] %[[#MemSem_SeqCst:]] %[[#Value:]]
+  %3 = atomicrmw sub i32 addrspace(1)* @ui, i32 42 syncscope("singlethread") 
seq_cst
+  ; CHECK: %[[#]] = OpAtomicISub %[[#Int]] %[[#Pointer:]] 
%[[#Scope_Invocation:]] %[[#MemSem_SeqCst:]] %[[#Value:]]
+  %4 = atomicrmw or i32 addrspace(1)* @ui, i32 42 syncscope("singlethread") 
seq_cst
+  ; CHECK: %[[#]] = OpAtomicOr %[[#Int]] %[[#Pointer:]] 
%[[#Scope_Invocation:]] %[[#MemSem_SeqCst:]] %[[#Value:]]
+  %5 = atomicrmw xor i32 addrspace(1)* @ui, i32 42 syncscope("singlethread") 
seq_cst
+  ; CHECK: %[[#]] = OpAtomicXor %[[#Int]] %[[#Pointer:]] 
%[[#Scope_Invocation:]] %[[#MemSem_SeqCst:]] %[[#Value:]]
+  %6 = atomicrmw and i32 addrspace(1)* @ui, i32 42 syncscope("singlethread") 
seq_cst
+  ; CHECK: %[[#]] = OpAtomicAnd %[[#Int]] %[[#Pointer:]] 
%[[#Scope_Invocation:]] %[[#MemSem_SeqCst:]] %[[#Value:]]
+  %7 = atomicrmw max i32 addrspace(1)* @ui, i32 42 syncscope("singlethread") 
seq_cst
+  ; CHECK: %[[#]] = OpAtomicSMax %[[#Int]] %[[#Pointer:]] 
%[[#Scope_Invocation:]] %[[#MemSem_SeqCst:]] %[[#Value:]]
+  %8 = atomicrmw min i32 addrspace(1)* @ui, i32 42 syncscope("singlethread") 
seq_cst
+  ; CHECK: %[[#]] = OpAtomicSMin %[[#Int]] %[[#Pointer:]] 
%[[#Scope_Invocation:]] %[[#MemSem_SeqCst:]] %[[#Value:]]
+  %9 = atomicrmw umax i32 addrspace(1)* @ui, i32 42 syncscope("singlethread") 
seq_cst
+  ; CHECK: %[[#]] = OpAtomicUMax %[[#Int]] %[[#Pointer:]] 
%[[#Scope_Invocation:]] %[[#MemSem_SeqCst:]] %[[#Value:]]
+  %10 = atomicrmw umin i32 addrspace(1)* @ui, i32 42 syncscope("singlethread") 
seq_cst
+  ; CHECK: %[[#]] = OpAtomicUMin %[[#Int]] %[[#Pointer:]] 
%[[#Scope_Invocation:]] %[[#MemSem_SeqCst:]] %[[#Value:]]
+
+  ret void
+}
+
+define dso_local spir_func void @test_subgroup_atomicrmw() local_unnamed_addr {
+entry:
+  %0 = atomicrmw xchg i32 addrspace(1)* @ui, i32 42 syncscope("subgroup") 
seq_cst
+  ; CHECK: %[[#]] = OpAtomicExchange %[[#Int]] %[[#Pointer:]] 
%[[#Scope_Subgroup:]] %[[#MemSem_SeqCst:]] %[[#Value:]]
+  %1 = atomicrmw xchg float addrspace(1)* @f, float 42.00e+00 
syncscope("subgroup") seq_cst
+  ; CHECK: %[[#]] = OpAtomicExchange %[[#Float:]] %[[#FPPointer:]] 
%[[#Scope_Subgroup:]] %[[#MemSem_SeqCst:]] %[[#FPValue:]]
+  %2 = atomicrmw add i32 addrspace(1)* @ui, i32 42 syncscope("subgroup") 
seq_cst
+  ; CHECK: %[[#]] = OpAtomicIAdd %[[#Int]] %[[#Pointer:]] 
%[[#Scope_Subgroup:]] %[[#MemSem_SeqCst:]] %[[#Value:]]
+  %3 = atomicrmw sub i32 addrspace(1)* @ui, i32 42 syncscope("subgroup") 
seq_cst
+  ; CHECK: %[[#]] = OpAtomicISub %[[#Int]] %[[#Pointer:]] 
%[[#Scope_Subgroup:]] %[[#MemSem_SeqCst:]] %[[#Value:]]
+  %4 = atomicrmw or i32 addrspace(1)* @ui, i32 42 syncscope("subgroup") seq_cst
+  ; CHECK: %[[#]] = OpAtomicOr %[[#Int]] %[[#Pointer:]] %[[#Scope_Subgroup:]] 
%[[#MemSem_SeqCst:]] %[[#Value:]]
+  %5 = atomicrmw xor

[clang] [clang][CodeGen][SPIR-V] Fix incorrect SYCL usage, implement missing interface (PR #109415)

2024-09-20 Thread Alex Voicu via cfe-commits

https://github.com/AlexVlx updated 
https://github.com/llvm/llvm-project/pull/109415

>From 75ca598c7e8a583545f50ee2c526556df261cc7f Mon Sep 17 00:00:00 2001
From: Alex Voicu 
Date: Fri, 20 Sep 2024 13:25:49 +0100
Subject: [PATCH 1/2] Implement `getGlobalVarAddressSpace` for SPIR-V; stop
 using SYCL for tests.

---
 clang/lib/Basic/Targets/SPIR.h|  5 
 clang/lib/CodeGen/Targets/SPIR.cpp| 23 
 .../CodeGenCXX/dynamic-cast-address-space.cpp | 23 
 .../test/CodeGenCXX/spirv-amdgcn-float16.cpp  | 27 ++-
 .../template-param-objects-address-space.cpp  |  4 +--
 ...w-expression-typeinfo-in-address-space.cpp |  2 +-
 .../try-catch-with-address-space.cpp  |  6 ++---
 .../typeid-cxx11-with-address-space.cpp   |  2 +-
 .../CodeGenCXX/typeid-with-address-space.cpp  |  4 +--
 .../typeinfo-with-address-space.cpp   | 14 +-
 .../vtable-assume-load-address-space.cpp  | 22 +++
 ...e-pointer-initialization-address-space.cpp |  2 +-
 clang/test/CodeGenCXX/vtt-address-space.cpp   |  2 +-
 .../CodeGenOpenCL/builtins-amdgcn-gfx11.cl|  4 +++
 clang/test/CodeGenOpenCL/builtins-amdgcn.cl   | 25 +++--
 15 files changed, 105 insertions(+), 60 deletions(-)

diff --git a/clang/lib/Basic/Targets/SPIR.h b/clang/lib/Basic/Targets/SPIR.h
index 37cf9d7921bac5..4dbc64d27ba909 100644
--- a/clang/lib/Basic/Targets/SPIR.h
+++ b/clang/lib/Basic/Targets/SPIR.h
@@ -380,6 +380,7 @@ class LLVM_LIBRARY_VISIBILITY SPIRV64AMDGCNTargetInfo final
 PointerWidth = PointerAlign = 64;
 SizeType = TargetInfo::UnsignedLong;
 PtrDiffType = IntPtrType = TargetInfo::SignedLong;
+AddrSpaceMap = &SPIRDefIsGenMap;
 
 resetDataLayout("e-i64:64-v16:16-v24:32-v32:32-v48:64-"
 "v96:128-v192:256-v256:256-v512:512-v1024:1024-G1-P4-A0");
@@ -412,6 +413,10 @@ class LLVM_LIBRARY_VISIBILITY SPIRV64AMDGCNTargetInfo final
 
   void setAuxTarget(const TargetInfo *Aux) override;
 
+  void adjust(DiagnosticsEngine &Diags, LangOptions &Opts) override {
+TargetInfo::adjust(Diags, Opts);
+  }
+
   bool hasInt128Type() const override { return TargetInfo::hasInt128Type(); }
 };
 
diff --git a/clang/lib/CodeGen/Targets/SPIR.cpp 
b/clang/lib/CodeGen/Targets/SPIR.cpp
index cc52925e2e523f..975b48afa03fcd 100644
--- a/clang/lib/CodeGen/Targets/SPIR.cpp
+++ b/clang/lib/CodeGen/Targets/SPIR.cpp
@@ -58,6 +58,8 @@ class SPIRVTargetCodeGenInfo : public 
CommonSPIRTargetCodeGenInfo {
   SPIRVTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT)
   : CommonSPIRTargetCodeGenInfo(std::make_unique(CGT)) {}
   void setCUDAKernelCallingConvention(const FunctionType *&FT) const override;
+  LangAS getGlobalVarAddressSpace(CodeGenModule &CGM,
+  const VarDecl *D) const override;
 };
 } // End anonymous namespace.
 
@@ -188,6 +190,27 @@ void 
SPIRVTargetCodeGenInfo::setCUDAKernelCallingConvention(
   }
 }
 
+LangAS SPIRVTargetCodeGenInfo::getGlobalVarAddressSpace(
+CodeGenModule &CGM, const VarDecl *D) const {
+  assert(!CGM.getLangOpts().OpenCL &&
+ !(CGM.getLangOpts().CUDA && CGM.getLangOpts().CUDAIsDevice) &&
+ "Address space agnostic languages only");
+  // If we're here it means that we're using the SPIRDefIsGen ASMap, hence for
+  // the global AS we can rely on either cuda_device or sycl_global to be
+  // correct; however, since this is not a CUDA Device context, we use
+  // sycl_global to prevent confusion with the assertion.
+  LangAS DefaultGlobalAS = getLangASFromTargetAS(
+  CGM.getContext().getTargetAddressSpace(LangAS::sycl_global));
+  if (!D)
+return DefaultGlobalAS;
+
+  LangAS AddrSpace = D->getType().getAddressSpace();
+  if (AddrSpace != LangAS::Default)
+return AddrSpace;
+
+  return DefaultGlobalAS;
+}
+
 /// Construct a SPIR-V target extension type for the given OpenCL image type.
 static llvm::Type *getSPIRVImageType(llvm::LLVMContext &Ctx, StringRef 
BaseType,
  StringRef OpenCLName,
diff --git a/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp 
b/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp
index 3d5e32516c7af2..b967701ca1fa96 100644
--- a/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp
+++ b/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --check-globals all --no-generate-body-for-unused-prefixes --version 4
 // RUN: %clang_cc1 -I%S %s -triple amdgcn-amd-amdhsa -emit-llvm 
-fcxx-exceptions -fexceptions -o - | FileCheck %s
-// RUN: %clang_cc1 -I%S %s -triple spirv64-unknown-unknown -fsycl-is-device 
-emit-llvm -fcxx-exceptions -fexceptions -o - | FileCheck %s 
--check-prefix=WITH-NONZERO-DEFAULT-AS
+// RUN: %clang_cc1 -I%S %s -triple spirv64-amd-amdhsa -emit-llvm 
-fcxx-exceptions -fexceptions -o - | FileCheck %s 
--check-prefix=WITH-NONZERO-DEFAULT-AS
 
 struct A { virtual voi

[clang] [Clang][NFC] Remove incorrect SYCL tests (PR #109182)

2024-09-20 Thread Alex Voicu via cfe-commits

AlexVlx wrote:

> > Ah, I remember now why the SYCL flag / mode was abused here (the default AS 
> > Map for SPIR-V is problematic in this case); I believe that using the 
> > `spirv64-amd-amdhsa` triple instead of `spirv64-unknown-unknown` will work, 
> > and allow for the removal of the SYCL references.
> 
> I tried changing the triple. The address spaces changed for several tests but 
> I do not know if change is correct or not since I am very unfamiliar with 
> what the expected behavior here is. I have uploaded the changes for your 
> review. Could you please verify the address space changes are correct. I do 
> not want to be testing for the wrong thing in the CHECK lines.
> 
> In some tests, using the new triple causes the test to crash. For those, I 
> just removed the test itself.
> 
> Personally I would like to submit the PR to remove the incorrect SYCL tests 
> without changing the triple (unless you can verify these new test changes are 
> correct), and have someone more familiar with this area of code to follow-up 
> with the right tests for #88182

Apologies for the delay. Please see #109415 which addresses this matter. Thank 
you.

https://github.com/llvm/llvm-project/pull/109182
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [clang][CodeGen][SPIR-V] Fix incorrect SYCL usage, implement missing interface (PR #109415)

2024-09-20 Thread Alex Voicu via cfe-commits

https://github.com/AlexVlx edited 
https://github.com/llvm/llvm-project/pull/109415
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [clang][CodeGen][SPIR-V] (PR #109415)

2024-09-20 Thread Alex Voicu via cfe-commits

https://github.com/AlexVlx created 
https://github.com/llvm/llvm-project/pull/109415

This is primarily meant to address the issue identified in #109182, around 
incorrect usage of `-fsycl-is-device`; we now have AMDGCN flavoured SPIR-V 
which retains the desired behaviour around the default AS and does not depend 
on the SYCL language being enabled to do so. Overall, there are three changes:

1. We unconditionally use the `SPIRDefIsGen` AS map for AMDGCNSPIRV target, as 
there is no case where the hack of setting default to private  would be 
desirable, and it can be used for languages other than OCL/HIP;
2. We implement `SPIRVTargetCodeGenInfo::getGlobalVarAddressSpace` for SPIR-V 
in general, because otherwise using it from languages other than HIP or OpenCL 
would yield 0, incorrectly;
3. We remove the incorrect usage of `-fsycl-is-device`.

>From 75ca598c7e8a583545f50ee2c526556df261cc7f Mon Sep 17 00:00:00 2001
From: Alex Voicu 
Date: Fri, 20 Sep 2024 13:25:49 +0100
Subject: [PATCH] Implement `getGlobalVarAddressSpace` for SPIR-V; stop using
 SYCL for tests.

---
 clang/lib/Basic/Targets/SPIR.h|  5 
 clang/lib/CodeGen/Targets/SPIR.cpp| 23 
 .../CodeGenCXX/dynamic-cast-address-space.cpp | 23 
 .../test/CodeGenCXX/spirv-amdgcn-float16.cpp  | 27 ++-
 .../template-param-objects-address-space.cpp  |  4 +--
 ...w-expression-typeinfo-in-address-space.cpp |  2 +-
 .../try-catch-with-address-space.cpp  |  6 ++---
 .../typeid-cxx11-with-address-space.cpp   |  2 +-
 .../CodeGenCXX/typeid-with-address-space.cpp  |  4 +--
 .../typeinfo-with-address-space.cpp   | 14 +-
 .../vtable-assume-load-address-space.cpp  | 22 +++
 ...e-pointer-initialization-address-space.cpp |  2 +-
 clang/test/CodeGenCXX/vtt-address-space.cpp   |  2 +-
 .../CodeGenOpenCL/builtins-amdgcn-gfx11.cl|  4 +++
 clang/test/CodeGenOpenCL/builtins-amdgcn.cl   | 25 +++--
 15 files changed, 105 insertions(+), 60 deletions(-)

diff --git a/clang/lib/Basic/Targets/SPIR.h b/clang/lib/Basic/Targets/SPIR.h
index 37cf9d7921bac5..4dbc64d27ba909 100644
--- a/clang/lib/Basic/Targets/SPIR.h
+++ b/clang/lib/Basic/Targets/SPIR.h
@@ -380,6 +380,7 @@ class LLVM_LIBRARY_VISIBILITY SPIRV64AMDGCNTargetInfo final
 PointerWidth = PointerAlign = 64;
 SizeType = TargetInfo::UnsignedLong;
 PtrDiffType = IntPtrType = TargetInfo::SignedLong;
+AddrSpaceMap = &SPIRDefIsGenMap;
 
 resetDataLayout("e-i64:64-v16:16-v24:32-v32:32-v48:64-"
 "v96:128-v192:256-v256:256-v512:512-v1024:1024-G1-P4-A0");
@@ -412,6 +413,10 @@ class LLVM_LIBRARY_VISIBILITY SPIRV64AMDGCNTargetInfo final
 
   void setAuxTarget(const TargetInfo *Aux) override;
 
+  void adjust(DiagnosticsEngine &Diags, LangOptions &Opts) override {
+TargetInfo::adjust(Diags, Opts);
+  }
+
   bool hasInt128Type() const override { return TargetInfo::hasInt128Type(); }
 };
 
diff --git a/clang/lib/CodeGen/Targets/SPIR.cpp 
b/clang/lib/CodeGen/Targets/SPIR.cpp
index cc52925e2e523f..975b48afa03fcd 100644
--- a/clang/lib/CodeGen/Targets/SPIR.cpp
+++ b/clang/lib/CodeGen/Targets/SPIR.cpp
@@ -58,6 +58,8 @@ class SPIRVTargetCodeGenInfo : public 
CommonSPIRTargetCodeGenInfo {
   SPIRVTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT)
   : CommonSPIRTargetCodeGenInfo(std::make_unique(CGT)) {}
   void setCUDAKernelCallingConvention(const FunctionType *&FT) const override;
+  LangAS getGlobalVarAddressSpace(CodeGenModule &CGM,
+  const VarDecl *D) const override;
 };
 } // End anonymous namespace.
 
@@ -188,6 +190,27 @@ void 
SPIRVTargetCodeGenInfo::setCUDAKernelCallingConvention(
   }
 }
 
+LangAS SPIRVTargetCodeGenInfo::getGlobalVarAddressSpace(
+CodeGenModule &CGM, const VarDecl *D) const {
+  assert(!CGM.getLangOpts().OpenCL &&
+ !(CGM.getLangOpts().CUDA && CGM.getLangOpts().CUDAIsDevice) &&
+ "Address space agnostic languages only");
+  // If we're here it means that we're using the SPIRDefIsGen ASMap, hence for
+  // the global AS we can rely on either cuda_device or sycl_global to be
+  // correct; however, since this is not a CUDA Device context, we use
+  // sycl_global to prevent confusion with the assertion.
+  LangAS DefaultGlobalAS = getLangASFromTargetAS(
+  CGM.getContext().getTargetAddressSpace(LangAS::sycl_global));
+  if (!D)
+return DefaultGlobalAS;
+
+  LangAS AddrSpace = D->getType().getAddressSpace();
+  if (AddrSpace != LangAS::Default)
+return AddrSpace;
+
+  return DefaultGlobalAS;
+}
+
 /// Construct a SPIR-V target extension type for the given OpenCL image type.
 static llvm::Type *getSPIRVImageType(llvm::LLVMContext &Ctx, StringRef 
BaseType,
  StringRef OpenCLName,
diff --git a/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp 
b/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp
index 3d5e32516c7af2..b967701ca1fa96 100644
--- 

[clang] [lld] [llvm] [mlir] [IR] Introduce `T` to `DataLayout` to represent flat address space if a target supports it (PR #108786)

2024-09-19 Thread Alex Voicu via cfe-commits

AlexVlx wrote:

> +1 to @efriedma-quic and @jdoerfert's comments. DataLayout should remain as 
> generic as possible. Trying to encode a concept of "_the_ flat address space" 
> in it seems way too specific to one optimization for one or two targets.

This isn't purely a nice to have optimisation aid though, is it? There are 
cases where you need a safe (portable?) default that the target can perhaps 
optimise, but, absent that, would at least work, and at the moment there's no 
handy way to query that generically (or from Clang). We do handwave 0 as being 
that safe default, and hope for the best, but as mentioned that relies on 
targets using 0 to correspond to flat/generic/whatever we call it, which they 
are not bound to do. To me, adding this is not entirely different from encoding 
the Alloca AS, which we already do.

https://github.com/llvm/llvm-project/pull/108786
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [SPIRV][RFC] Rework / extend support for memory scopes (PR #106429)

2024-09-18 Thread Alex Voicu via cfe-commits


@@ -33,7 +33,8 @@
 #include "llvm/Support/Debug.h"
 
 namespace {

AlexVlx wrote:

Done.

https://github.com/llvm/llvm-project/pull/106429
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [SPIRV][RFC] Rework / extend support for memory scopes (PR #106429)

2024-09-18 Thread Alex Voicu via cfe-commits


@@ -251,6 +251,24 @@ SPIRV::MemorySemantics::MemorySemantics 
getMemSemantics(AtomicOrdering Ord) {
   llvm_unreachable(nullptr);
 }
 
+SPIRV::Scope::Scope getMemScope(const LLVMContext &Ctx, SyncScope::ID ID) {

AlexVlx wrote:

I've taken this suggestion, but typed it out slightly strangely (I can re-work 
it if there are objections) to keep the IDs still together / allow for easy 
hoisting if it turns out we want to re-use them elsewhere.

https://github.com/llvm/llvm-project/pull/106429
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][NFC] Remove incorrect SYCL tests (PR #109182)

2024-09-18 Thread Alex Voicu via cfe-commits

AlexVlx wrote:

> > Instead of removing the test case altogether, does it not suffice to remove 
> > the SYCL reference / SYCL specific flag, which I interpret as the problem 
> > being addressed? SYCL per se is not a target, but rather a language? 
> > Apologies if I'm missing something.
> 
> I tried that but there was no address spaces then. I wasn't sure if the tests 
> were relevant anymore in that case. I'll paste a snippet of what I saw below -
> 
> ```
> ; Function Attrs: mustprogress noinline optnone
> define spir_func noundef nonnull align 8 dereferenceable(8) ptr @_Z1fP1A(ptr 
> noundef %a) #0 personality ptr @__gxx_personality_v0 {
> entry:
>   %a.addr = alloca ptr, align 8
>   %exn.slot = alloca ptr, align 8
>   %ehselector.slot = alloca i32, align 4
>   store ptr %a, ptr %a.addr, align 8
>   %0 = load ptr, ptr %a.addr, align 8
>   %1 = call spir_func ptr @__dynamic_cast(ptr %0, ptr addrspace(1) @_ZTI1A, 
> ptr addrspace(1) @_ZTI1B, i64 0) #3
>   %2 = icmp eq ptr %1, null
>   br i1 %2, label %dynamic_cast.bad_cast, label %dynamic_cast.end
> 
> dynamic_cast.bad_cast:; preds = %entry
>   invoke spir_func void @__cxa_bad_cast() #4
>   to label %invoke.cont unwind label %lpad
> 
> invoke.cont:  ; preds = 
> %dynamic_cast.bad_cast
>   unreachable
> 
> dynamic_cast.end: ; preds = %entry
>   br label %try.cont
> 
> lpad: ; preds = 
> %dynamic_cast.bad_cast
>   %3 = landingpad { ptr, i32 }
>   catch ptr null
>   %4 = extractvalue { ptr, i32 } %3, 0
>   store ptr %4, ptr %exn.slot, align 8
>   %5 = extractvalue { ptr, i32 } %3, 1
>   store i32 %5, ptr %ehselector.slot, align 4
>   br label %catch
> ```
> 
> vs what the test was checking
> 
> ```
> // WITH-NONZERO-DEFAULT-AS-LABEL: define spir_func noundef align 8 
> dereferenceable(8) ptr addrspace(4) @_Z1fP1A(
> // WITH-NONZERO-DEFAULT-AS-SAME: ptr addrspace(4) noundef [[A:%.*]]) 
> #[[ATTR0:[0-9]+]] personality ptr @__gxx_personality_v0 {
> // WITH-NONZERO-DEFAULT-AS-NEXT:  entry:
> // WITH-NONZERO-DEFAULT-AS-NEXT:[[RETVAL:%.*]] = alloca ptr addrspace(4), 
> align 8
> // WITH-NONZERO-DEFAULT-AS-NEXT:[[A_ADDR:%.*]] = alloca ptr addrspace(4), 
> align 8
> // WITH-NONZERO-DEFAULT-AS-NEXT:[[EXN_SLOT:%.*]] = alloca ptr 
> addrspace(4), align 8
> // WITH-NONZERO-DEFAULT-AS-NEXT:[[EHSELECTOR_SLOT:%.*]] = alloca i32, 
> align 4
> // WITH-NONZERO-DEFAULT-AS-NEXT:[[RETVAL_ASCAST:%.*]] = addrspacecast ptr 
> [[RETVAL]] to ptr addrspace(4)
> // WITH-NONZERO-DEFAULT-AS-NEXT:[[A_ADDR_ASCAST:%.*]] = addrspacecast ptr 
> [[A_ADDR]] to ptr addrspace(4)
> // WITH-NONZERO-DEFAULT-AS-NEXT:store ptr addrspace(4) [[A]], ptr 
> addrspace(4) [[A_ADDR_ASCAST]], align 8
> // WITH-NONZERO-DEFAULT-AS-NEXT:[[TMP0:%.*]] = load ptr addrspace(4), ptr 
> addrspace(4) [[A_ADDR_ASCAST]], align 8
> // WITH-NONZERO-DEFAULT-AS-NEXT:[[TMP1:%.*]] = call spir_func ptr 
> addrspace(4) @__dynamic_cast(ptr addrspace(4) [[TMP0]], ptr addrspace(1) 
> @_ZTI1A, ptr addrspace(1) @_ZTI1B, i64 0) #[[ATTR3:[0-9]+]]
> // WITH-NONZERO-DEFAULT-AS-NEXT:[[TMP2:%.*]] = icmp eq ptr addrspace(4) 
> [[TMP1]], null
> // WITH-NONZERO-DEFAULT-AS-NEXT:br i1 [[TMP2]], label 
> [[DYNAMIC_CAST_BAD_CAST:%.*]], label [[DYNAMIC_CAST_END:%.*]]
> // WITH-NONZERO-DEFAULT-AS:   dynamic_cast.bad_cast:
> // WITH-NONZERO-DEFAULT-AS-NEXT:invoke spir_func void @__cxa_bad_cast() 
> #[[ATTR4:[0-9]+]]
> // WITH-NONZERO-DEFAULT-AS-NEXT:to label [[INVOKE_CONT:%.*]] 
> unwind label [[LPAD:%.*]]
> // WITH-NONZERO-DEFAULT-AS:   invoke.cont:
> // WITH-NONZERO-DEFAULT-AS-NEXT:unreachable
> // WITH-NONZERO-DEFAULT-AS:   dynamic_cast.end:
> // WITH-NONZERO-DEFAULT-AS-NEXT:br label [[TRY_CONT:%.*]]
> // WITH-NONZERO-DEFAULT-AS:   lpad:
> // WITH-NONZERO-DEFAULT-AS-NEXT:[[TMP3:%.*]] = landingpad { ptr 
> addrspace(4), i32 }
> // WITH-NONZERO-DEFAULT-AS-NEXT:catch ptr addrspace(4) null
> // WITH-NONZERO-DEFAULT-AS-NEXT:[[TMP4:%.*]] = extractvalue { ptr 
> addrspace(4), i32 } [[TMP3]], 0
> // WITH-NONZERO-DEFAULT-AS-NEXT:store ptr addrspace(4) [[TMP4]], ptr 
> [[EXN_SLOT]], align 8
> // WITH-NONZERO-DEFAULT-AS-NEXT:[[TMP5:%.*]] = extractvalue { ptr 
> addrspace(4), i32 } [[TMP3]], 1
> // WITH-NONZERO-DEFAULT-AS-NEXT:store i32 [[TMP5]], ptr 
> [[EHSELECTOR_SLOT]], align 4
> // WITH-NONZERO-DEFAULT-AS-NEXT:br label [[CATCH:%.*]]
> // WITH-NONZERO-DEFAULT-AS:   catch:
> // WITH-NONZERO-DEFAULT-AS-NEXT:[[EXN:%.*]] = load ptr addrspace(4), ptr 
> [[EXN_SLOT]], align 
> ```

Ah, I remember now why the SYCL flag / mode was abused here (the default AS Map 
for SPIR-V is problematic in this case); I believe that using the 
`spirv64-amd-amdhsa` triple instead of `spirv64-unknown-unknown` will work, and 
allow for the removal of the SYCL references. 

https://github.com/llvm/l

[clang] [Clang][NFC] Remove incorrect SYCL tests (PR #109182)

2024-09-18 Thread Alex Voicu via cfe-commits

AlexVlx wrote:

Instead of removing the test case altogether,  does it not suffice to remove 
the SYCL reference / SYCL specific flag, which I interpret as the problem being 
addressed? SYCL per se is not a target, but rather a language? Apologies if I'm 
missing something.

https://github.com/llvm/llvm-project/pull/109182
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [SPIRV][RFC] Rework / extend support for memory scopes (PR #106429)

2024-09-18 Thread Alex Voicu via cfe-commits


@@ -766,8 +766,19 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr 
*Expr, Address Dest,
   // LLVM atomic instructions always have synch scope. If clang atomic
   // expression has no scope operand, use default LLVM synch scope.
   if (!ScopeModel) {
+llvm::SyncScope::ID SS;
+if (CGF.getLangOpts().OpenCL)
+  // OpenCL approach is: "The functions that do not have memory_scope
+  // argument have the same semantics as the corresponding functions with
+  // the memory_scope argument set to memory_scope_device." See ref.:
+  // 
https://registry.khronos.org/OpenCL/specs/3.0-unified/html/OpenCL_C.html#atomic-functions
+  SS = CGF.getTargetHooks().getLLVMSyncScopeID(CGF.getLangOpts(),
+   SyncScope::OpenCLDevice,
+   Order, 
CGF.getLLVMContext());
+else
+  SS = CGF.getLLVMContext().getOrInsertSyncScopeID("");

AlexVlx wrote:

Done.

https://github.com/llvm/llvm-project/pull/106429
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [lld] [llvm] [mlir] [IR] Introduce `T` to `DataLayout` to represent flat address space if a target supports it (PR #108786)

2024-09-16 Thread Alex Voicu via cfe-commits

AlexVlx wrote:

> I updated the PR description. Hopefully it can make the motivation of this 
> patch more clear. I think the central question is, why it can't be just 
> address space 0. I don't have a clear answer to that. @arsenm first 
> introduced this concept to LLVM at least 8 years ago. I agree that the only 
> two targets that have this concept are AMDGPU and NVPTX, and they both set it 
> to address space 0. My understanding is, LLVM doesn't force any other 
> characteristics to address 0 in addition to just making it as the default one 
> if there is no other specified for the allocation of certain variables, so we 
> can't assert or assume how a target needs to implement it.

There are targets that use a different integer to denote flat (e.g. see SPIR & 
SPIR-V). Whilst I know that there are objections to that, the fact remains that 
they had historical reason (wanted to make legacy OCL convention that the 
default is private work, and given that IR defaults to 0 this was an easy, if 
possibly costly, way out; AMDGPU also borks this for legacy OCL reasons, which 
has been a source of pain). I think the core matter, which we also talked about 
in #95728, is that LLVM defaults to 0 and posits some guarantees around that as 
per LangRef, but it doesn't say much of anything about guaranteed 
convertibility to / from 0. Which turns out to be a problem for targets / 
pseudo-targets which do not also use 0 to denote flat/generic, as a number of 
constructs that merely rely on getting the default end up needing a cast. 
@efriedma-quic made the point that in general there should / would always be a 
correct AS and the solution is to emit these in the right AS from the get-go, 
but the problem is somewhat pervasive.

https://github.com/llvm/llvm-project/pull/108786
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [lld] [llvm] [mlir] [IR] Introduce `T` to `DataLayout` to represent flat address space if a target supports it (PR #108786)

2024-09-16 Thread Alex Voicu via cfe-commits


@@ -3050,6 +3050,19 @@ as follows:
 address space 0, this property only affects the default value to be used
 when creating globals without additional contextual information (e.g. in
 LLVM passes).
+``T``
+Specifies the address space for a target's 'flat' address space. Note this
+is not necessarily the same as addrspace 0, which LLVM sometimes refers to
+as the generic address space. The flat address space is a generic address
+space that can be used access multiple segments of memory with different
+address spaces. Access of a memory location through a pointer with this
+address space is expected to be legal but slower compared to the same 
memory
+location accessed through a pointer with a different address space. This is
+for targets with different pointer representations which can be converted
+with the addrspacecast instruction. If a pointer is converted to this
+address space, optimizations should attempt to replace the access with the
+source address space. The absence of this specification indicates the 
target
+does not have such a flat address space to optimize away.

AlexVlx wrote:

I don't think the latter has to be true, as there can be some degree of nesting 
for address spaces (for example, some targets have the constant AS simply be 
global + additional guarantees, but the two can physically / conceptually 
alias).

https://github.com/llvm/llvm-project/pull/108786
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [lld] [llvm] [mlir] [IR] Introduce `T` to `DataLayout` to represent flat address space if a target supports it (PR #108786)

2024-09-16 Thread Alex Voicu via cfe-commits


@@ -245,6 +246,7 @@ class DataLayout {
   unsigned getDefaultGlobalsAddressSpace() const {
 return DefaultGlobalsAddrSpace;
   }
+  unsigned getFlatAddressSpace() const { return FlatAddressSpace; }

AlexVlx wrote:

I think that for general ergonomics making this an `optional` is preferable to 
magic constant + comment. That'd unambiguously convey absence, and it's what 
optional is for. Whilst incredibly unlikely, it is currently not impossible for 
some future target do decide that AS UINT32_MAX carries semantic meaning, and 
run into trouble around this.

https://github.com/llvm/llvm-project/pull/108786
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [SPIRV][RFC] Rework / extend support for memory scopes (PR #106429)

2024-09-16 Thread Alex Voicu via cfe-commits

AlexVlx wrote:

> @AlexVlx I don't see much objections against #108528 on a conceptual level, 
> so what do you think about merging it into this PR in a way that I commented 
> above, by changing `getMemScope()` and moving `getOrInsertSyncScopeID()` into 
> its static vars initialization?

At a glance it seems fine, thank you for working through this, but since I've 
been away for a couple of days let me page things back in first:)

https://github.com/llvm/llvm-project/pull/106429
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [SPIRV][RFC] Rework / extend support for memory scopes (PR #106429)

2024-09-16 Thread Alex Voicu via cfe-commits


@@ -58,7 +58,35 @@ class SPIRVTargetCodeGenInfo : public 
CommonSPIRTargetCodeGenInfo {
   SPIRVTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT)
   : CommonSPIRTargetCodeGenInfo(std::make_unique(CGT)) {}
   void setCUDAKernelCallingConvention(const FunctionType *&FT) const override;
+  llvm::SyncScope::ID getLLVMSyncScopeID(const LangOptions &LangOpts,
+ SyncScope Scope,
+ llvm::AtomicOrdering Ordering,
+ llvm::LLVMContext &Ctx) const 
override;
 };
+
+inline StringRef mapClangSyncScopeToLLVM(SyncScope Scope) {
+  switch (Scope) {
+  case SyncScope::HIPSingleThread:
+  case SyncScope::SingleScope:
+return "singlethread";
+  case SyncScope::HIPWavefront:
+  case SyncScope::OpenCLSubGroup:
+  case SyncScope::WavefrontScope:
+return "subgroup";
+  case SyncScope::HIPWorkgroup:
+  case SyncScope::OpenCLWorkGroup:
+  case SyncScope::WorkgroupScope:
+return "workgroup";
+  case SyncScope::HIPAgent:
+  case SyncScope::OpenCLDevice:
+  case SyncScope::DeviceScope:
+return "device";
+  case SyncScope::SystemScope:
+  case SyncScope::HIPSystem:
+  case SyncScope::OpenCLAllSVMDevices:
+return "all_svm_devices";

AlexVlx wrote:

> +1, we should align on the scope string names. Since we may have different 
> languages with different naming for the scope it makes sense to take names 
> from the common specification eg SPIR-V. So probably we should rename 
> `sub_group` (or `subgroup`) to `Subgroup` etc; and `singlethread` should be 
> aliased with `Invocation`.

I'd object to using CamelCase for the IR representation (re-using the SPIR-V 
naming is fine, which is where `subgroup` vs `sub_group` originated) on the 
grounds that we don't really use that style anywhere else in IR, and other 
targets have already gone snake_case.

https://github.com/llvm/llvm-project/pull/106429
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [SPIRV][RFC] Rework / extend support for memory scopes (PR #106429)

2024-09-16 Thread Alex Voicu via cfe-commits


@@ -251,6 +251,24 @@ SPIRV::MemorySemantics::MemorySemantics 
getMemSemantics(AtomicOrdering Ord) {
   llvm_unreachable(nullptr);
 }
 
+SPIRV::Scope::Scope getMemScope(const LLVMContext &Ctx, SyncScope::ID ID) {
+  SmallVector SSNs;
+  Ctx.getSyncScopeNames(SSNs);
+
+  StringRef MemScope = SSNs[ID];
+  if (MemScope.empty() || MemScope == "all_svm_devices")

AlexVlx wrote:

On consideration, I think I'm with @arsenm on this one, both `singlethread` and 
the empty string (system) have a defined meaning in LLVM, with all else being 
target specific. So, IMHO, for these we actually should think about LLVM's 
default, and how the target implements those, whilst for everything else we 
should defer to the target.

https://github.com/llvm/llvm-project/pull/106429
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [SPIRV][RFC] Rework / extend support for memory scopes (PR #106429)

2024-09-09 Thread Alex Voicu via cfe-commits

AlexVlx wrote:

> Thank you for the PR! I'd like to better understand motivation and 
> justification of SPIR-V BE-related changes though. The goal would be to 
> understand whether AllSvmDevices is indeed a better choice (for whom?) than 
> Device as a default mem scope value in SPIR-V BE.
> 
> 1. Questions to the description of the PR.
> 
> > "These were previously unconditionally lowered to Device scope, which is 
> > can be too conservative and possibly incorrect."
> 
> The claim is not justified by any docs/specs. Why Device scope is incorrect 
> as a default? In my opinion, it's AllSvmDevices that looks like a 
> conservative choice that may lead to performance degradation in general case 
> when we change the default without notifying customers. Or, we may say that 
> potential performance changes may depend on a vendor-specific behavior in 
> this case.
> 
> > "Furthermore, the default / implicit scope is changed from Device (an 
> > OpenCL assumption) to AllSvmDevices (aka System), since the SPIR-V BE is 
> > not OpenCL specific / can ingest IR coming from other language front-ends."
> 
> What I know without additional references to other docs/specs is that Device 
> is default by OpenCL spec 
> (https://registry.khronos.org/OpenCL/specs/3.0-unified/html/OpenCL_C.html#atomic-functions).
>  It would help if you can provide references where AllSvmDevices is a 
> preferable choice, so that we are able to compare and figure out the best 
> default for the Computational flavor of SPIR-V. For sure, SPIR-V BE is not 
> OpenCL (=Device) specific, and it's also not specific to any particular 
> vendor or computational framework. I've seen usages of AllSvmDevices as 
> default in the code base (for example, in
> 
> https://github.com/llvm/llvm-project/blob/319e8cd201e6744199da377fba237dd276063e49/clang/lib/CodeGen/Targets/AMDGPU.cpp#L537
> 
> ), but it seems not enough to flip the default over.
> > "OpenCL defaulting to Device scope is now reflected in the front-end 
> > handling of atomic ops, which seems preferable."
> 
> Changes in clang part looks really good to me. However, when we add to it 
> changes in SPIR-V part of the code base, things look less optimistic, because 
> what this PR means by "the front-end handling of atomic ops" is the upstream 
> clang only, whereas actual choices of a front-end are more versatile, and 
> users coming to SPIR-V by other paths would get a sudden change of behavior 
> in the worst case (e.g., MLIR input for the GenAI domain).
> 
> ===
> 
> 2. If it's acceptable to split this PR into two separate PR's (clang and 
> SPIR-V), I'd gladly support changes in clang part, it makes sense for me. At 
> the moment, however, I have objections against SPIR-V Backend changes as they 
> are represented in the PR:
> 
> * This PR looks like a breaking change that would flip over the default value 
> of mem scope for all environments except for OpenCL and may have a 
> potentially negative impact on an unknown number of projects/customers. I'd 
> guess that OpenCL would not notice the difference, because path that goes via 
> upstream clang front-end redefines default mem scope as Device. All other 
> toolchains just get a breaking change in the form of the AllSvmDevices 
> default. clang-related changes do not help to smooth this, because SPIRV BE 
> should remain agnostic towards front-ends, frameworks, etc.
> * A technical comment is that the proposed implementation in SPIR-V part is 
> less efficient that existing. It compares strings rather than integers and 
> fills in scope names on each call to the getMemScope() function, whereas 
> existing implementation does it just once per a machine function.
> * A terminology (the choice of syncscope names) is debatable. The closest 
> thing in specs that I see is 
> https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#_scope_id. I 
> don't see any references to "singlethread" in the specs. Name "workitem" 
> (spelling precisely as "work-item") is used at least in the official Khronos 
> documents (see for example 
> https://registry.khronos.org/SPIR-V/specs/1.0/SPIR-V-execution-and-memory-model.pdf).
>  "all_svm_devices" is not mentioned in the specs at all (there is only the 
> "CrossDevice" term).
> 
> ===
> 
> For now, I'd rather see an eventual solution in the form of further 
> classification of the computational flavor of SPIR-V (not just Compute vs. 
> Vulkan but breaking Compute part further where this is required) -- comparing 
> to this sudden change of the default in favor of any incarnation of Compute 
> targets. As the first approach, all SPIR-V-related changes may require just a 
> short snippet of the kind "if TheTriple is XXX-specific then use CrossDevice 
> instead of Device" and minor rename of syncscope names ("subgroup", for 
> example, indeed makes more sense than "sub_group"). This would probably 
> require a description in the SPIRVUsage doc as well to avoid confusion among 
> customers. Anyway, I'd be

[clang] [llvm] [SPIRV][RFC] Rework / extend support for memory scopes (PR #106429)

2024-09-09 Thread Alex Voicu via cfe-commits


@@ -766,8 +766,17 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr 
*Expr, Address Dest,
   // LLVM atomic instructions always have synch scope. If clang atomic
   // expression has no scope operand, use default LLVM synch scope.
   if (!ScopeModel) {
+llvm::SyncScope::ID SS = CGF.getLLVMContext().getOrInsertSyncScopeID("");
+if (CGF.getLangOpts().OpenCL)
+  // OpenCL approach is: "The functions that do not have memory_scope

AlexVlx wrote:

This is the primary entry point for Atomic emission, so things like the Clang 
builtins (which do not carry scopes) would end up here.

https://github.com/llvm/llvm-project/pull/106429
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [SPIRV][RFC] Rework / extend support for memory scopes (PR #106429)

2024-09-04 Thread Alex Voicu via cfe-commits


@@ -188,6 +192,41 @@ void 
SPIRVTargetCodeGenInfo::setCUDAKernelCallingConvention(
   }
 }
 
+llvm::SyncScope::ID
+SPIRVTargetCodeGenInfo::getLLVMSyncScopeID(const LangOptions &, SyncScope 
Scope,
+   llvm::AtomicOrdering,
+   llvm::LLVMContext &Ctx) const {
+  std::string Name;
+  switch (Scope) {
+  case SyncScope::HIPSingleThread:
+  case SyncScope::SingleScope:
+Name = "singlethread";
+break;

AlexVlx wrote:

Done.

https://github.com/llvm/llvm-project/pull/106429
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [SPIRV][RFC] Rework / extend support for memory scopes (PR #106429)

2024-09-04 Thread Alex Voicu via cfe-commits


@@ -188,6 +192,41 @@ void 
SPIRVTargetCodeGenInfo::setCUDAKernelCallingConvention(
   }
 }
 
+llvm::SyncScope::ID
+SPIRVTargetCodeGenInfo::getLLVMSyncScopeID(const LangOptions &, SyncScope 
Scope,
+   llvm::AtomicOrdering,
+   llvm::LLVMContext &Ctx) const {
+  std::string Name;

AlexVlx wrote:

Done.

https://github.com/llvm/llvm-project/pull/106429
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [SPIRV][RFC] Rework / extend support for memory scopes (PR #106429)

2024-09-04 Thread Alex Voicu via cfe-commits


@@ -766,8 +766,17 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr 
*Expr, Address Dest,
   // LLVM atomic instructions always have synch scope. If clang atomic
   // expression has no scope operand, use default LLVM synch scope.
   if (!ScopeModel) {
+llvm::SyncScope::ID SS = CGF.getLLVMContext().getOrInsertSyncScopeID("");

AlexVlx wrote:

Done.

https://github.com/llvm/llvm-project/pull/106429
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [SPIRV][RFC] Rework / extend support for memory scopes (PR #106429)

2024-09-04 Thread Alex Voicu via cfe-commits


@@ -335,6 +335,9 @@ class LLVM_LIBRARY_VISIBILITY SPIRV32TargetInfo : public 
BaseSPIRVTargetInfo {
 PointerWidth = PointerAlign = 32;
 SizeType = TargetInfo::UnsignedInt;
 PtrDiffType = IntPtrType = TargetInfo::SignedInt;
+// SPIR-V has core support for atomic ops, and Int32 is always available;
+// we take the maximum because it's possible the Host supports wider types.
+MaxAtomicInlineWidth = std::max(MaxAtomicInlineWidth, 32);

AlexVlx wrote:

I'm assuming that the SPIRV32 target exists for cases where the `Int64` 
capability is never enabled, but it would probably be useful to have that 
assumption checked. For SPIR-V the model for extensions / capabilities in LLVM 
seems to be push i.e. extensions get enabled / checked iff a feature requiring 
the extension / capability is encountered when translating (legacy) / lowering 
(the experimental BE). FWIW, my reading of the SPIR-V spec is that the `Int64` 
capability is core.

https://github.com/llvm/llvm-project/pull/106429
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [clang][CodeGen][SPIR-V][AMDGPU] Tweak AMDGCNSPIRV ABI to allow for the correct handling of aggregates passed to kernels / functions. (PR #102776)

2024-08-21 Thread Alex Voicu via cfe-commits

https://github.com/AlexVlx closed 
https://github.com/llvm/llvm-project/pull/102776
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [clang][CodeGen][SPIR-V][AMDGPU] Tweak AMDGCNSPIRV ABI to allow for the correct handling of aggregates passed to kernels / functions. (PR #102776)

2024-08-20 Thread Alex Voicu via cfe-commits

https://github.com/AlexVlx updated 
https://github.com/llvm/llvm-project/pull/102776

>From d41faf6da8a9eed8c32f6a62fa9ebf38d5824c2c Mon Sep 17 00:00:00 2001
From: Alex Voicu 
Date: Sun, 11 Aug 2024 01:39:46 +0300
Subject: [PATCH 1/3] Tweak AMDGCNSPIRV ABI to allow for the correct handling
 of aggregates passed to kernels / functions.

---
 clang/lib/CodeGen/Targets/SPIR.cpp|  73 +-
 .../amdgpu-kernel-arg-pointer-type.cu | 723 --
 clang/test/CodeGenCUDA/kernel-args.cu |   6 +
 3 files changed, 731 insertions(+), 71 deletions(-)

diff --git a/clang/lib/CodeGen/Targets/SPIR.cpp 
b/clang/lib/CodeGen/Targets/SPIR.cpp
index cf068cbc4fcd36..1319332635b863 100644
--- a/clang/lib/CodeGen/Targets/SPIR.cpp
+++ b/clang/lib/CodeGen/Targets/SPIR.cpp
@@ -32,7 +32,9 @@ class SPIRVABIInfo : public CommonSPIRABIInfo {
   void computeInfo(CGFunctionInfo &FI) const override;
 
 private:
+  ABIArgInfo classifyReturnType(QualType RetTy) const;
   ABIArgInfo classifyKernelArgumentType(QualType Ty) const;
+  ABIArgInfo classifyArgumentType(QualType Ty) const;
 };
 } // end anonymous namespace
 namespace {
@@ -64,6 +66,27 @@ void CommonSPIRABIInfo::setCCs() {
   RuntimeCC = llvm::CallingConv::SPIR_FUNC;
 }
 
+ABIArgInfo SPIRVABIInfo::classifyReturnType(QualType RetTy) const {
+  if (getTarget().getTriple().getVendor() != llvm::Triple::AMD)
+return DefaultABIInfo::classifyReturnType(RetTy);
+  if (!isAggregateTypeForABI(RetTy) || getRecordArgABI(RetTy, getCXXABI()))
+return DefaultABIInfo::classifyReturnType(RetTy);
+
+  if (const RecordType *RT = RetTy->getAs()) {
+const RecordDecl *RD = RT->getDecl();
+if (RD->hasFlexibleArrayMember())
+  return DefaultABIInfo::classifyReturnType(RetTy);
+  }
+
+  // TODO: The AMDGPU ABI is non-trivial to represent in SPIR-V; in order to
+  // avoid encoding various architecture specific bits here we return 
everything
+  // as direct to retain type info for things like aggregates, for later 
perusal
+  // when translating back to LLVM/lowering in the BE. This is also why we
+  // disable flattening as the outcomes can mismatch between SPIR-V and AMDGPU.
+  // This will be revisited / optimised in the future.
+  return ABIArgInfo::getDirect(CGT.ConvertType(RetTy), 0u, nullptr, false);
+}
+
 ABIArgInfo SPIRVABIInfo::classifyKernelArgumentType(QualType Ty) const {
   if (getContext().getLangOpts().CUDAIsDevice) {
 // Coerce pointer arguments with default address space to CrossWorkGroup
@@ -78,18 +101,52 @@ ABIArgInfo 
SPIRVABIInfo::classifyKernelArgumentType(QualType Ty) const {
   return ABIArgInfo::getDirect(LTy, 0, nullptr, false);
 }
 
-// Force copying aggregate type in kernel arguments by value when
-// compiling CUDA targeting SPIR-V. This is required for the object
-// copied to be valid on the device.
-// This behavior follows the CUDA spec
-// 
https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#global-function-argument-processing,
-// and matches the NVPTX implementation.
-if (isAggregateTypeForABI(Ty))
-  return getNaturalAlignIndirect(Ty, /* byval */ true);
+   if (isAggregateTypeForABI(Ty)) {
+  if (getTarget().getTriple().getVendor() == llvm::Triple::AMD)
+// TODO: The AMDGPU kernel ABI passes aggregates byref, which is not
+// currently expressible in SPIR-V; SPIR-V passes aggregates byval,
+// which the AMDGPU kernel ABI does not allow. Passing aggregates as
+// direct works around this impedance mismatch, as it retains type info
+// and can be correctly handled, post reverse-translation, by the 
AMDGPU
+// BE, which has to support this CC for legacy OpenCL purposes. It can
+// be brittle and does lead to performance degradation in certain
+// pathological cases. This will be revisited / optimised in the 
future,
+// once a way to deal with the byref/byval impedance mismatch is
+// identified.
+return ABIArgInfo::getDirect(LTy, 0, nullptr, false);
+  else
+// Force copying aggregate type in kernel arguments by value when
+// compiling CUDA targeting SPIR-V. This is required for the object
+// copied to be valid on the device.
+// This behavior follows the CUDA spec
+// 
https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#global-function-argument-processing,
+// and matches the NVPTX implementation.
+return getNaturalAlignIndirect(Ty, /* byval */ true);
+}
   }
   return classifyArgumentType(Ty);
 }
 
+ABIArgInfo SPIRVABIInfo::classifyArgumentType(QualType Ty) const {
+  if (getTarget().getTriple().getVendor() != llvm::Triple::AMD)
+return DefaultABIInfo::classifyArgumentType(Ty);
+  if (!isAggregateTypeForABI(Ty))
+return DefaultABIInfo::classifyArgumentType(Ty);
+
+  // Records with non-trivial destructors/copy-constructors should not be
+  // passed by value.
+  if (auto RAA = getRecor

[clang] [clang][CodeGen][SPIR-V][AMDGPU] Tweak AMDGCNSPIRV ABI to allow for the correct handling of aggregates passed to kernels / functions. (PR #102776)

2024-08-20 Thread Alex Voicu via cfe-commits

AlexVlx wrote:

@VyacheslavLevytskyy @michalpaszkowski any objections / thoughts on this?

https://github.com/llvm/llvm-project/pull/102776
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [clang][CodeGen][SPIR-V][AMDGPU] Tweak AMDGCNSPIRV ABI to allow for the correct handling of aggregates passed to kernels / functions. (PR #102776)

2024-08-19 Thread Alex Voicu via cfe-commits


@@ -64,6 +66,27 @@ void CommonSPIRABIInfo::setCCs() {
   RuntimeCC = llvm::CallingConv::SPIR_FUNC;
 }
 
+ABIArgInfo SPIRVABIInfo::classifyReturnType(QualType RetTy) const {
+  if (getTarget().getTriple().getVendor() != llvm::Triple::AMD)
+return DefaultABIInfo::classifyReturnType(RetTy);
+  if (!isAggregateTypeForABI(RetTy) || getRecordArgABI(RetTy, getCXXABI()))
+return DefaultABIInfo::classifyReturnType(RetTy);
+
+  if (const RecordType *RT = RetTy->getAs()) {
+const RecordDecl *RD = RT->getDecl();
+if (RD->hasFlexibleArrayMember())
+  return DefaultABIInfo::classifyReturnType(RetTy);
+  }
+
+  // TODO: The AMDGPU ABI is non-trivial to represent in SPIR-V; in order to
+  // avoid encoding various architecture specific bits here we return 
everything
+  // as direct to retain type info for things like aggregates, for later 
perusal
+  // when translating back to LLVM/lowering in the BE. This is also why we
+  // disable flattening as the outcomes can mismatch between SPIR-V and AMDGPU.
+  // This will be revisited / optimised in the future.

AlexVlx wrote:

> byval doesn't really make sense on a kernel as there is no real caller, but 
> depending on how SPIRV defined its byval attribute, maybe you can codegen 
> LLVM byref into SPIRV byval?

This is a good suggestion, it's one of the things I'm looking at, but it's a 
bit messy to retain the "this is actually `byref`" (it is doable, but was quite 
intrusive with tools like the translator), and I admit to not fully debugging / 
validating it. I just realised that the "add an extension to SPIR-V" bit came 
off pretty passive-aggressive, I apologise - I think that might just be 
TheRightWayTM to deal with this, `byref` is common enough to justify it.

https://github.com/llvm/llvm-project/pull/102776
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [clang][CodeGen][SPIR-V][AMDGPU] Tweak AMDGCNSPIRV ABI to allow for the correct handling of aggregates passed to kernels / functions. (PR #102776)

2024-08-19 Thread Alex Voicu via cfe-commits


@@ -64,6 +66,27 @@ void CommonSPIRABIInfo::setCCs() {
   RuntimeCC = llvm::CallingConv::SPIR_FUNC;
 }
 
+ABIArgInfo SPIRVABIInfo::classifyReturnType(QualType RetTy) const {
+  if (getTarget().getTriple().getVendor() != llvm::Triple::AMD)
+return DefaultABIInfo::classifyReturnType(RetTy);
+  if (!isAggregateTypeForABI(RetTy) || getRecordArgABI(RetTy, getCXXABI()))
+return DefaultABIInfo::classifyReturnType(RetTy);
+
+  if (const RecordType *RT = RetTy->getAs()) {
+const RecordDecl *RD = RT->getDecl();
+if (RD->hasFlexibleArrayMember())
+  return DefaultABIInfo::classifyReturnType(RetTy);
+  }
+
+  // TODO: The AMDGPU ABI is non-trivial to represent in SPIR-V; in order to
+  // avoid encoding various architecture specific bits here we return 
everything
+  // as direct to retain type info for things like aggregates, for later 
perusal
+  // when translating back to LLVM/lowering in the BE. This is also why we
+  // disable flattening as the outcomes can mismatch between SPIR-V and AMDGPU.
+  // This will be revisited / optimised in the future.

AlexVlx wrote:

To wit, it's necessary pain, in the end, but we've not yet figured out the 
right way to do it, since there are a few moving pieces involved. Perhaps the 
solution will end up to add `byref` to the SPIR-V spec; in the meanwhile, this 
is a sufficient, if temporary and slightly icky, solution.

https://github.com/llvm/llvm-project/pull/102776
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [clang][CodeGen][SPIR-V][AMDGPU] Tweak AMDGCNSPIRV ABI to allow for the correct handling of aggregates passed to kernels / functions. (PR #102776)

2024-08-19 Thread Alex Voicu via cfe-commits


@@ -78,18 +101,52 @@ ABIArgInfo 
SPIRVABIInfo::classifyKernelArgumentType(QualType Ty) const {
   return ABIArgInfo::getDirect(LTy, 0, nullptr, false);
 }
 
-// Force copying aggregate type in kernel arguments by value when
-// compiling CUDA targeting SPIR-V. This is required for the object
-// copied to be valid on the device.
-// This behavior follows the CUDA spec
-// 
https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#global-function-argument-processing,
-// and matches the NVPTX implementation.
-if (isAggregateTypeForABI(Ty))
-  return getNaturalAlignIndirect(Ty, /* byval */ true);
+if (isAggregateTypeForABI(Ty)) {
+  if (getTarget().getTriple().getVendor() == llvm::Triple::AMD)
+// TODO: The AMDGPU kernel ABI passes aggregates byref, which is not
+// currently expressible in SPIR-V; SPIR-V passes aggregates byval,
+// which the AMDGPU kernel ABI does not allow. Passing aggregates as
+// direct works around this impedance mismatch, as it retains type info
+// and can be correctly handled, post reverse-translation, by the 
AMDGPU
+// BE, which has to support this CC for legacy OpenCL purposes. It can
+// be brittle and does lead to performance degradation in certain
+// pathological cases. This will be revisited / optimised in the 
future,
+// once a way to deal with the byref/byval impedance mismatch is
+// identified.
+return ABIArgInfo::getDirect(LTy, 0, nullptr, false);
+  else

AlexVlx wrote:

Done, thank you.

https://github.com/llvm/llvm-project/pull/102776
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [clang][CodeGen][SPIR-V][AMDGPU] Tweak AMDGCNSPIRV ABI to allow for the correct handling of aggregates passed to kernels / functions. (PR #102776)

2024-08-19 Thread Alex Voicu via cfe-commits

https://github.com/AlexVlx updated 
https://github.com/llvm/llvm-project/pull/102776

>From d41faf6da8a9eed8c32f6a62fa9ebf38d5824c2c Mon Sep 17 00:00:00 2001
From: Alex Voicu 
Date: Sun, 11 Aug 2024 01:39:46 +0300
Subject: [PATCH 1/3] Tweak AMDGCNSPIRV ABI to allow for the correct handling
 of aggregates passed to kernels / functions.

---
 clang/lib/CodeGen/Targets/SPIR.cpp|  73 +-
 .../amdgpu-kernel-arg-pointer-type.cu | 723 --
 clang/test/CodeGenCUDA/kernel-args.cu |   6 +
 3 files changed, 731 insertions(+), 71 deletions(-)

diff --git a/clang/lib/CodeGen/Targets/SPIR.cpp 
b/clang/lib/CodeGen/Targets/SPIR.cpp
index cf068cbc4fcd36..1319332635b863 100644
--- a/clang/lib/CodeGen/Targets/SPIR.cpp
+++ b/clang/lib/CodeGen/Targets/SPIR.cpp
@@ -32,7 +32,9 @@ class SPIRVABIInfo : public CommonSPIRABIInfo {
   void computeInfo(CGFunctionInfo &FI) const override;
 
 private:
+  ABIArgInfo classifyReturnType(QualType RetTy) const;
   ABIArgInfo classifyKernelArgumentType(QualType Ty) const;
+  ABIArgInfo classifyArgumentType(QualType Ty) const;
 };
 } // end anonymous namespace
 namespace {
@@ -64,6 +66,27 @@ void CommonSPIRABIInfo::setCCs() {
   RuntimeCC = llvm::CallingConv::SPIR_FUNC;
 }
 
+ABIArgInfo SPIRVABIInfo::classifyReturnType(QualType RetTy) const {
+  if (getTarget().getTriple().getVendor() != llvm::Triple::AMD)
+return DefaultABIInfo::classifyReturnType(RetTy);
+  if (!isAggregateTypeForABI(RetTy) || getRecordArgABI(RetTy, getCXXABI()))
+return DefaultABIInfo::classifyReturnType(RetTy);
+
+  if (const RecordType *RT = RetTy->getAs()) {
+const RecordDecl *RD = RT->getDecl();
+if (RD->hasFlexibleArrayMember())
+  return DefaultABIInfo::classifyReturnType(RetTy);
+  }
+
+  // TODO: The AMDGPU ABI is non-trivial to represent in SPIR-V; in order to
+  // avoid encoding various architecture specific bits here we return 
everything
+  // as direct to retain type info for things like aggregates, for later 
perusal
+  // when translating back to LLVM/lowering in the BE. This is also why we
+  // disable flattening as the outcomes can mismatch between SPIR-V and AMDGPU.
+  // This will be revisited / optimised in the future.
+  return ABIArgInfo::getDirect(CGT.ConvertType(RetTy), 0u, nullptr, false);
+}
+
 ABIArgInfo SPIRVABIInfo::classifyKernelArgumentType(QualType Ty) const {
   if (getContext().getLangOpts().CUDAIsDevice) {
 // Coerce pointer arguments with default address space to CrossWorkGroup
@@ -78,18 +101,52 @@ ABIArgInfo 
SPIRVABIInfo::classifyKernelArgumentType(QualType Ty) const {
   return ABIArgInfo::getDirect(LTy, 0, nullptr, false);
 }
 
-// Force copying aggregate type in kernel arguments by value when
-// compiling CUDA targeting SPIR-V. This is required for the object
-// copied to be valid on the device.
-// This behavior follows the CUDA spec
-// 
https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#global-function-argument-processing,
-// and matches the NVPTX implementation.
-if (isAggregateTypeForABI(Ty))
-  return getNaturalAlignIndirect(Ty, /* byval */ true);
+   if (isAggregateTypeForABI(Ty)) {
+  if (getTarget().getTriple().getVendor() == llvm::Triple::AMD)
+// TODO: The AMDGPU kernel ABI passes aggregates byref, which is not
+// currently expressible in SPIR-V; SPIR-V passes aggregates byval,
+// which the AMDGPU kernel ABI does not allow. Passing aggregates as
+// direct works around this impedance mismatch, as it retains type info
+// and can be correctly handled, post reverse-translation, by the 
AMDGPU
+// BE, which has to support this CC for legacy OpenCL purposes. It can
+// be brittle and does lead to performance degradation in certain
+// pathological cases. This will be revisited / optimised in the 
future,
+// once a way to deal with the byref/byval impedance mismatch is
+// identified.
+return ABIArgInfo::getDirect(LTy, 0, nullptr, false);
+  else
+// Force copying aggregate type in kernel arguments by value when
+// compiling CUDA targeting SPIR-V. This is required for the object
+// copied to be valid on the device.
+// This behavior follows the CUDA spec
+// 
https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#global-function-argument-processing,
+// and matches the NVPTX implementation.
+return getNaturalAlignIndirect(Ty, /* byval */ true);
+}
   }
   return classifyArgumentType(Ty);
 }
 
+ABIArgInfo SPIRVABIInfo::classifyArgumentType(QualType Ty) const {
+  if (getTarget().getTriple().getVendor() != llvm::Triple::AMD)
+return DefaultABIInfo::classifyArgumentType(Ty);
+  if (!isAggregateTypeForABI(Ty))
+return DefaultABIInfo::classifyArgumentType(Ty);
+
+  // Records with non-trivial destructors/copy-constructors should not be
+  // passed by value.
+  if (auto RAA = getRecor

[clang] [clang][CodeGen][SPIR-V][AMDGPU] Tweak AMDGCNSPIRV ABI to allow for the correct handling of aggregates passed to kernels / functions. (PR #102776)

2024-08-19 Thread Alex Voicu via cfe-commits


@@ -78,18 +101,52 @@ ABIArgInfo 
SPIRVABIInfo::classifyKernelArgumentType(QualType Ty) const {
   return ABIArgInfo::getDirect(LTy, 0, nullptr, false);
 }
 
-// Force copying aggregate type in kernel arguments by value when
-// compiling CUDA targeting SPIR-V. This is required for the object
-// copied to be valid on the device.
-// This behavior follows the CUDA spec
-// 
https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#global-function-argument-processing,
-// and matches the NVPTX implementation.
-if (isAggregateTypeForABI(Ty))
-  return getNaturalAlignIndirect(Ty, /* byval */ true);
+if (isAggregateTypeForABI(Ty)) {
+  if (getTarget().getTriple().getVendor() == llvm::Triple::AMD)
+// TODO: The AMDGPU kernel ABI passes aggregates byref, which is not
+// currently expressible in SPIR-V; SPIR-V passes aggregates byval,
+// which the AMDGPU kernel ABI does not allow. Passing aggregates as
+// direct works around this impedance mismatch, as it retains type info
+// and can be correctly handled, post reverse-translation, by the 
AMDGPU
+// BE, which has to support this CC for legacy OpenCL purposes. It can
+// be brittle and does lead to performance degradation in certain
+// pathological cases. This will be revisited / optimised in the 
future,
+// once a way to deal with the byref/byval impedance mismatch is

AlexVlx wrote:

That's fine, when it amounts to something we'll (me) just fix it here.

https://github.com/llvm/llvm-project/pull/102776
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [clang][CodeGen][SPIR-V][AMDGPU] Tweak AMDGCNSPIRV ABI to allow for the correct handling of aggregates passed to kernels / functions. (PR #102776)

2024-08-19 Thread Alex Voicu via cfe-commits


@@ -64,6 +66,27 @@ void CommonSPIRABIInfo::setCCs() {
   RuntimeCC = llvm::CallingConv::SPIR_FUNC;
 }
 
+ABIArgInfo SPIRVABIInfo::classifyReturnType(QualType RetTy) const {
+  if (getTarget().getTriple().getVendor() != llvm::Triple::AMD)
+return DefaultABIInfo::classifyReturnType(RetTy);
+  if (!isAggregateTypeForABI(RetTy) || getRecordArgABI(RetTy, getCXXABI()))
+return DefaultABIInfo::classifyReturnType(RetTy);
+
+  if (const RecordType *RT = RetTy->getAs()) {
+const RecordDecl *RD = RT->getDecl();
+if (RD->hasFlexibleArrayMember())
+  return DefaultABIInfo::classifyReturnType(RetTy);
+  }
+
+  // TODO: The AMDGPU ABI is non-trivial to represent in SPIR-V; in order to
+  // avoid encoding various architecture specific bits here we return 
everything
+  // as direct to retain type info for things like aggregates, for later 
perusal
+  // when translating back to LLVM/lowering in the BE. This is also why we
+  // disable flattening as the outcomes can mismatch between SPIR-V and AMDGPU.
+  // This will be revisited / optimised in the future.

AlexVlx wrote:

I think you misunderstand the core problem, which is orthogonal to LLVM: SPIR-V 
the IR specification does not [encode byref, only 
byval](https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#_function_parameter_attribute).
 Whatever you might feel about what is right or wrong is somewhat unhelpful 
here, unless you are going to add an extension to that end? Trying to backdoor 
this is just a lot of pain.

https://github.com/llvm/llvm-project/pull/102776
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [clang][CodeGen][SPIR-V][AMDGPU] Tweak AMDGCNSPIRV ABI to allow for the correct handling of aggregates passed to kernels / functions. (PR #102776)

2024-08-19 Thread Alex Voicu via cfe-commits

https://github.com/AlexVlx updated 
https://github.com/llvm/llvm-project/pull/102776

>From d41faf6da8a9eed8c32f6a62fa9ebf38d5824c2c Mon Sep 17 00:00:00 2001
From: Alex Voicu 
Date: Sun, 11 Aug 2024 01:39:46 +0300
Subject: [PATCH 1/2] Tweak AMDGCNSPIRV ABI to allow for the correct handling
 of aggregates passed to kernels / functions.

---
 clang/lib/CodeGen/Targets/SPIR.cpp|  73 +-
 .../amdgpu-kernel-arg-pointer-type.cu | 723 --
 clang/test/CodeGenCUDA/kernel-args.cu |   6 +
 3 files changed, 731 insertions(+), 71 deletions(-)

diff --git a/clang/lib/CodeGen/Targets/SPIR.cpp 
b/clang/lib/CodeGen/Targets/SPIR.cpp
index cf068cbc4fcd36..1319332635b863 100644
--- a/clang/lib/CodeGen/Targets/SPIR.cpp
+++ b/clang/lib/CodeGen/Targets/SPIR.cpp
@@ -32,7 +32,9 @@ class SPIRVABIInfo : public CommonSPIRABIInfo {
   void computeInfo(CGFunctionInfo &FI) const override;
 
 private:
+  ABIArgInfo classifyReturnType(QualType RetTy) const;
   ABIArgInfo classifyKernelArgumentType(QualType Ty) const;
+  ABIArgInfo classifyArgumentType(QualType Ty) const;
 };
 } // end anonymous namespace
 namespace {
@@ -64,6 +66,27 @@ void CommonSPIRABIInfo::setCCs() {
   RuntimeCC = llvm::CallingConv::SPIR_FUNC;
 }
 
+ABIArgInfo SPIRVABIInfo::classifyReturnType(QualType RetTy) const {
+  if (getTarget().getTriple().getVendor() != llvm::Triple::AMD)
+return DefaultABIInfo::classifyReturnType(RetTy);
+  if (!isAggregateTypeForABI(RetTy) || getRecordArgABI(RetTy, getCXXABI()))
+return DefaultABIInfo::classifyReturnType(RetTy);
+
+  if (const RecordType *RT = RetTy->getAs()) {
+const RecordDecl *RD = RT->getDecl();
+if (RD->hasFlexibleArrayMember())
+  return DefaultABIInfo::classifyReturnType(RetTy);
+  }
+
+  // TODO: The AMDGPU ABI is non-trivial to represent in SPIR-V; in order to
+  // avoid encoding various architecture specific bits here we return 
everything
+  // as direct to retain type info for things like aggregates, for later 
perusal
+  // when translating back to LLVM/lowering in the BE. This is also why we
+  // disable flattening as the outcomes can mismatch between SPIR-V and AMDGPU.
+  // This will be revisited / optimised in the future.
+  return ABIArgInfo::getDirect(CGT.ConvertType(RetTy), 0u, nullptr, false);
+}
+
 ABIArgInfo SPIRVABIInfo::classifyKernelArgumentType(QualType Ty) const {
   if (getContext().getLangOpts().CUDAIsDevice) {
 // Coerce pointer arguments with default address space to CrossWorkGroup
@@ -78,18 +101,52 @@ ABIArgInfo 
SPIRVABIInfo::classifyKernelArgumentType(QualType Ty) const {
   return ABIArgInfo::getDirect(LTy, 0, nullptr, false);
 }
 
-// Force copying aggregate type in kernel arguments by value when
-// compiling CUDA targeting SPIR-V. This is required for the object
-// copied to be valid on the device.
-// This behavior follows the CUDA spec
-// 
https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#global-function-argument-processing,
-// and matches the NVPTX implementation.
-if (isAggregateTypeForABI(Ty))
-  return getNaturalAlignIndirect(Ty, /* byval */ true);
+   if (isAggregateTypeForABI(Ty)) {
+  if (getTarget().getTriple().getVendor() == llvm::Triple::AMD)
+// TODO: The AMDGPU kernel ABI passes aggregates byref, which is not
+// currently expressible in SPIR-V; SPIR-V passes aggregates byval,
+// which the AMDGPU kernel ABI does not allow. Passing aggregates as
+// direct works around this impedance mismatch, as it retains type info
+// and can be correctly handled, post reverse-translation, by the 
AMDGPU
+// BE, which has to support this CC for legacy OpenCL purposes. It can
+// be brittle and does lead to performance degradation in certain
+// pathological cases. This will be revisited / optimised in the 
future,
+// once a way to deal with the byref/byval impedance mismatch is
+// identified.
+return ABIArgInfo::getDirect(LTy, 0, nullptr, false);
+  else
+// Force copying aggregate type in kernel arguments by value when
+// compiling CUDA targeting SPIR-V. This is required for the object
+// copied to be valid on the device.
+// This behavior follows the CUDA spec
+// 
https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#global-function-argument-processing,
+// and matches the NVPTX implementation.
+return getNaturalAlignIndirect(Ty, /* byval */ true);
+}
   }
   return classifyArgumentType(Ty);
 }
 
+ABIArgInfo SPIRVABIInfo::classifyArgumentType(QualType Ty) const {
+  if (getTarget().getTriple().getVendor() != llvm::Triple::AMD)
+return DefaultABIInfo::classifyArgumentType(Ty);
+  if (!isAggregateTypeForABI(Ty))
+return DefaultABIInfo::classifyArgumentType(Ty);
+
+  // Records with non-trivial destructors/copy-constructors should not be
+  // passed by value.
+  if (auto RAA = getRecor

[clang] [clang][CodeGen][SPIR-V][AMDGPU] Tweak AMDGCNSPIRV ABI to allow for the correct handling of aggregates passed to kernels / functions. (PR #102776)

2024-08-19 Thread Alex Voicu via cfe-commits

https://github.com/AlexVlx edited 
https://github.com/llvm/llvm-project/pull/102776
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [clang][CodeGen][SPIR-V][AMDGPU] Tweak AMDGCNSPIRV ABI to allow for the correct handling of aggregates passed to kernels / functions. (PR #102776)

2024-08-19 Thread Alex Voicu via cfe-commits

https://github.com/AlexVlx commented:

Gentle ping.

https://github.com/llvm/llvm-project/pull/102776
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [clang][CodeGen][SPIR-V][AMDGPU] Tweak AMDGCNSPIRV ABI to allow for the correct handling of aggregates passed to kernels / functions. (PR #102776)

2024-08-19 Thread Alex Voicu via cfe-commits

https://github.com/AlexVlx updated 
https://github.com/llvm/llvm-project/pull/102776

>From d41faf6da8a9eed8c32f6a62fa9ebf38d5824c2c Mon Sep 17 00:00:00 2001
From: Alex Voicu 
Date: Sun, 11 Aug 2024 01:39:46 +0300
Subject: [PATCH 1/2] Tweak AMDGCNSPIRV ABI to allow for the correct handling
 of aggregates passed to kernels / functions.

---
 clang/lib/CodeGen/Targets/SPIR.cpp|  73 +-
 .../amdgpu-kernel-arg-pointer-type.cu | 723 --
 clang/test/CodeGenCUDA/kernel-args.cu |   6 +
 3 files changed, 731 insertions(+), 71 deletions(-)

diff --git a/clang/lib/CodeGen/Targets/SPIR.cpp 
b/clang/lib/CodeGen/Targets/SPIR.cpp
index cf068cbc4fcd36..1319332635b863 100644
--- a/clang/lib/CodeGen/Targets/SPIR.cpp
+++ b/clang/lib/CodeGen/Targets/SPIR.cpp
@@ -32,7 +32,9 @@ class SPIRVABIInfo : public CommonSPIRABIInfo {
   void computeInfo(CGFunctionInfo &FI) const override;
 
 private:
+  ABIArgInfo classifyReturnType(QualType RetTy) const;
   ABIArgInfo classifyKernelArgumentType(QualType Ty) const;
+  ABIArgInfo classifyArgumentType(QualType Ty) const;
 };
 } // end anonymous namespace
 namespace {
@@ -64,6 +66,27 @@ void CommonSPIRABIInfo::setCCs() {
   RuntimeCC = llvm::CallingConv::SPIR_FUNC;
 }
 
+ABIArgInfo SPIRVABIInfo::classifyReturnType(QualType RetTy) const {
+  if (getTarget().getTriple().getVendor() != llvm::Triple::AMD)
+return DefaultABIInfo::classifyReturnType(RetTy);
+  if (!isAggregateTypeForABI(RetTy) || getRecordArgABI(RetTy, getCXXABI()))
+return DefaultABIInfo::classifyReturnType(RetTy);
+
+  if (const RecordType *RT = RetTy->getAs()) {
+const RecordDecl *RD = RT->getDecl();
+if (RD->hasFlexibleArrayMember())
+  return DefaultABIInfo::classifyReturnType(RetTy);
+  }
+
+  // TODO: The AMDGPU ABI is non-trivial to represent in SPIR-V; in order to
+  // avoid encoding various architecture specific bits here we return 
everything
+  // as direct to retain type info for things like aggregates, for later 
perusal
+  // when translating back to LLVM/lowering in the BE. This is also why we
+  // disable flattening as the outcomes can mismatch between SPIR-V and AMDGPU.
+  // This will be revisited / optimised in the future.
+  return ABIArgInfo::getDirect(CGT.ConvertType(RetTy), 0u, nullptr, false);
+}
+
 ABIArgInfo SPIRVABIInfo::classifyKernelArgumentType(QualType Ty) const {
   if (getContext().getLangOpts().CUDAIsDevice) {
 // Coerce pointer arguments with default address space to CrossWorkGroup
@@ -78,18 +101,52 @@ ABIArgInfo 
SPIRVABIInfo::classifyKernelArgumentType(QualType Ty) const {
   return ABIArgInfo::getDirect(LTy, 0, nullptr, false);
 }
 
-// Force copying aggregate type in kernel arguments by value when
-// compiling CUDA targeting SPIR-V. This is required for the object
-// copied to be valid on the device.
-// This behavior follows the CUDA spec
-// 
https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#global-function-argument-processing,
-// and matches the NVPTX implementation.
-if (isAggregateTypeForABI(Ty))
-  return getNaturalAlignIndirect(Ty, /* byval */ true);
+   if (isAggregateTypeForABI(Ty)) {
+  if (getTarget().getTriple().getVendor() == llvm::Triple::AMD)
+// TODO: The AMDGPU kernel ABI passes aggregates byref, which is not
+// currently expressible in SPIR-V; SPIR-V passes aggregates byval,
+// which the AMDGPU kernel ABI does not allow. Passing aggregates as
+// direct works around this impedance mismatch, as it retains type info
+// and can be correctly handled, post reverse-translation, by the 
AMDGPU
+// BE, which has to support this CC for legacy OpenCL purposes. It can
+// be brittle and does lead to performance degradation in certain
+// pathological cases. This will be revisited / optimised in the 
future,
+// once a way to deal with the byref/byval impedance mismatch is
+// identified.
+return ABIArgInfo::getDirect(LTy, 0, nullptr, false);
+  else
+// Force copying aggregate type in kernel arguments by value when
+// compiling CUDA targeting SPIR-V. This is required for the object
+// copied to be valid on the device.
+// This behavior follows the CUDA spec
+// 
https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#global-function-argument-processing,
+// and matches the NVPTX implementation.
+return getNaturalAlignIndirect(Ty, /* byval */ true);
+}
   }
   return classifyArgumentType(Ty);
 }
 
+ABIArgInfo SPIRVABIInfo::classifyArgumentType(QualType Ty) const {
+  if (getTarget().getTriple().getVendor() != llvm::Triple::AMD)
+return DefaultABIInfo::classifyArgumentType(Ty);
+  if (!isAggregateTypeForABI(Ty))
+return DefaultABIInfo::classifyArgumentType(Ty);
+
+  // Records with non-trivial destructors/copy-constructors should not be
+  // passed by value.
+  if (auto RAA = getRecor

[clang] [clang][CodeGen][SPIR-V][AMDGPU] Tweak AMDGCNSPIRV ABI to allow for the correct handling of aggregates passed to kernels / functions. (PR #102776)

2024-08-13 Thread Alex Voicu via cfe-commits

https://github.com/AlexVlx updated 
https://github.com/llvm/llvm-project/pull/102776

>From d41faf6da8a9eed8c32f6a62fa9ebf38d5824c2c Mon Sep 17 00:00:00 2001
From: Alex Voicu 
Date: Sun, 11 Aug 2024 01:39:46 +0300
Subject: [PATCH 1/2] Tweak AMDGCNSPIRV ABI to allow for the correct handling
 of aggregates passed to kernels / functions.

---
 clang/lib/CodeGen/Targets/SPIR.cpp|  73 +-
 .../amdgpu-kernel-arg-pointer-type.cu | 723 --
 clang/test/CodeGenCUDA/kernel-args.cu |   6 +
 3 files changed, 731 insertions(+), 71 deletions(-)

diff --git a/clang/lib/CodeGen/Targets/SPIR.cpp 
b/clang/lib/CodeGen/Targets/SPIR.cpp
index cf068cbc4fcd36..1319332635b863 100644
--- a/clang/lib/CodeGen/Targets/SPIR.cpp
+++ b/clang/lib/CodeGen/Targets/SPIR.cpp
@@ -32,7 +32,9 @@ class SPIRVABIInfo : public CommonSPIRABIInfo {
   void computeInfo(CGFunctionInfo &FI) const override;
 
 private:
+  ABIArgInfo classifyReturnType(QualType RetTy) const;
   ABIArgInfo classifyKernelArgumentType(QualType Ty) const;
+  ABIArgInfo classifyArgumentType(QualType Ty) const;
 };
 } // end anonymous namespace
 namespace {
@@ -64,6 +66,27 @@ void CommonSPIRABIInfo::setCCs() {
   RuntimeCC = llvm::CallingConv::SPIR_FUNC;
 }
 
+ABIArgInfo SPIRVABIInfo::classifyReturnType(QualType RetTy) const {
+  if (getTarget().getTriple().getVendor() != llvm::Triple::AMD)
+return DefaultABIInfo::classifyReturnType(RetTy);
+  if (!isAggregateTypeForABI(RetTy) || getRecordArgABI(RetTy, getCXXABI()))
+return DefaultABIInfo::classifyReturnType(RetTy);
+
+  if (const RecordType *RT = RetTy->getAs()) {
+const RecordDecl *RD = RT->getDecl();
+if (RD->hasFlexibleArrayMember())
+  return DefaultABIInfo::classifyReturnType(RetTy);
+  }
+
+  // TODO: The AMDGPU ABI is non-trivial to represent in SPIR-V; in order to
+  // avoid encoding various architecture specific bits here we return 
everything
+  // as direct to retain type info for things like aggregates, for later 
perusal
+  // when translating back to LLVM/lowering in the BE. This is also why we
+  // disable flattening as the outcomes can mismatch between SPIR-V and AMDGPU.
+  // This will be revisited / optimised in the future.
+  return ABIArgInfo::getDirect(CGT.ConvertType(RetTy), 0u, nullptr, false);
+}
+
 ABIArgInfo SPIRVABIInfo::classifyKernelArgumentType(QualType Ty) const {
   if (getContext().getLangOpts().CUDAIsDevice) {
 // Coerce pointer arguments with default address space to CrossWorkGroup
@@ -78,18 +101,52 @@ ABIArgInfo 
SPIRVABIInfo::classifyKernelArgumentType(QualType Ty) const {
   return ABIArgInfo::getDirect(LTy, 0, nullptr, false);
 }
 
-// Force copying aggregate type in kernel arguments by value when
-// compiling CUDA targeting SPIR-V. This is required for the object
-// copied to be valid on the device.
-// This behavior follows the CUDA spec
-// 
https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#global-function-argument-processing,
-// and matches the NVPTX implementation.
-if (isAggregateTypeForABI(Ty))
-  return getNaturalAlignIndirect(Ty, /* byval */ true);
+   if (isAggregateTypeForABI(Ty)) {
+  if (getTarget().getTriple().getVendor() == llvm::Triple::AMD)
+// TODO: The AMDGPU kernel ABI passes aggregates byref, which is not
+// currently expressible in SPIR-V; SPIR-V passes aggregates byval,
+// which the AMDGPU kernel ABI does not allow. Passing aggregates as
+// direct works around this impedance mismatch, as it retains type info
+// and can be correctly handled, post reverse-translation, by the 
AMDGPU
+// BE, which has to support this CC for legacy OpenCL purposes. It can
+// be brittle and does lead to performance degradation in certain
+// pathological cases. This will be revisited / optimised in the 
future,
+// once a way to deal with the byref/byval impedance mismatch is
+// identified.
+return ABIArgInfo::getDirect(LTy, 0, nullptr, false);
+  else
+// Force copying aggregate type in kernel arguments by value when
+// compiling CUDA targeting SPIR-V. This is required for the object
+// copied to be valid on the device.
+// This behavior follows the CUDA spec
+// 
https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#global-function-argument-processing,
+// and matches the NVPTX implementation.
+return getNaturalAlignIndirect(Ty, /* byval */ true);
+}
   }
   return classifyArgumentType(Ty);
 }
 
+ABIArgInfo SPIRVABIInfo::classifyArgumentType(QualType Ty) const {
+  if (getTarget().getTriple().getVendor() != llvm::Triple::AMD)
+return DefaultABIInfo::classifyArgumentType(Ty);
+  if (!isAggregateTypeForABI(Ty))
+return DefaultABIInfo::classifyArgumentType(Ty);
+
+  // Records with non-trivial destructors/copy-constructors should not be
+  // passed by value.
+  if (auto RAA = getRecor

[clang] [clang][CodeGen][SPIR-V][AMDGPU] Tweak AMDGCNSPIRV ABI to allow for the correct handling of aggregates passed to kernels / functions. (PR #102776)

2024-08-13 Thread Alex Voicu via cfe-commits

https://github.com/AlexVlx edited 
https://github.com/llvm/llvm-project/pull/102776
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [clang][CodeGen][SPIR-V][AMDGPU] Tweak AMDGCNSPIRV ABI to allow for the correct handling of aggregates passed to kernels / functions. (PR #102776)

2024-08-13 Thread Alex Voicu via cfe-commits

https://github.com/AlexVlx commented:

> > but empirically robust and guaranteed to work as the AMDGPU BE retains 
> > handling of direct passing for legacy reasons.

I would like to get rid of that someday... 

I share the sentiment, but as far as I can see this is one of those things that 
has been on the way out someday for years, so it's not going to suddenly 
disappear. At worst, since tests cover it, if the BE goes and disallows direct 
passing to a kernel (for example), tests flare up and we'll have to fix it on 
the AMDGCNSPIRV end.

https://github.com/llvm/llvm-project/pull/102776
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang] Add `__CLANG_GPU_DISABLE_MATH_WRAPPERS` macro for offloading math (PR #98234)

2024-08-13 Thread Alex Voicu via cfe-commits

https://github.com/AlexVlx approved this pull request.


https://github.com/llvm/llvm-project/pull/98234
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [clang][CodeGen][SPIR-V][AMDGPU] Tweak AMDGCNSPIRV ABI to allow for the correct handling of aggregates passed to kernels / functions. (PR #102776)

2024-08-10 Thread Alex Voicu via cfe-commits

https://github.com/AlexVlx updated 
https://github.com/llvm/llvm-project/pull/102776

>From d41faf6da8a9eed8c32f6a62fa9ebf38d5824c2c Mon Sep 17 00:00:00 2001
From: Alex Voicu 
Date: Sun, 11 Aug 2024 01:39:46 +0300
Subject: [PATCH 1/2] Tweak AMDGCNSPIRV ABI to allow for the correct handling
 of aggregates passed to kernels / functions.

---
 clang/lib/CodeGen/Targets/SPIR.cpp|  73 +-
 .../amdgpu-kernel-arg-pointer-type.cu | 723 --
 clang/test/CodeGenCUDA/kernel-args.cu |   6 +
 3 files changed, 731 insertions(+), 71 deletions(-)

diff --git a/clang/lib/CodeGen/Targets/SPIR.cpp 
b/clang/lib/CodeGen/Targets/SPIR.cpp
index cf068cbc4fcd36..1319332635b863 100644
--- a/clang/lib/CodeGen/Targets/SPIR.cpp
+++ b/clang/lib/CodeGen/Targets/SPIR.cpp
@@ -32,7 +32,9 @@ class SPIRVABIInfo : public CommonSPIRABIInfo {
   void computeInfo(CGFunctionInfo &FI) const override;
 
 private:
+  ABIArgInfo classifyReturnType(QualType RetTy) const;
   ABIArgInfo classifyKernelArgumentType(QualType Ty) const;
+  ABIArgInfo classifyArgumentType(QualType Ty) const;
 };
 } // end anonymous namespace
 namespace {
@@ -64,6 +66,27 @@ void CommonSPIRABIInfo::setCCs() {
   RuntimeCC = llvm::CallingConv::SPIR_FUNC;
 }
 
+ABIArgInfo SPIRVABIInfo::classifyReturnType(QualType RetTy) const {
+  if (getTarget().getTriple().getVendor() != llvm::Triple::AMD)
+return DefaultABIInfo::classifyReturnType(RetTy);
+  if (!isAggregateTypeForABI(RetTy) || getRecordArgABI(RetTy, getCXXABI()))
+return DefaultABIInfo::classifyReturnType(RetTy);
+
+  if (const RecordType *RT = RetTy->getAs()) {
+const RecordDecl *RD = RT->getDecl();
+if (RD->hasFlexibleArrayMember())
+  return DefaultABIInfo::classifyReturnType(RetTy);
+  }
+
+  // TODO: The AMDGPU ABI is non-trivial to represent in SPIR-V; in order to
+  // avoid encoding various architecture specific bits here we return 
everything
+  // as direct to retain type info for things like aggregates, for later 
perusal
+  // when translating back to LLVM/lowering in the BE. This is also why we
+  // disable flattening as the outcomes can mismatch between SPIR-V and AMDGPU.
+  // This will be revisited / optimised in the future.
+  return ABIArgInfo::getDirect(CGT.ConvertType(RetTy), 0u, nullptr, false);
+}
+
 ABIArgInfo SPIRVABIInfo::classifyKernelArgumentType(QualType Ty) const {
   if (getContext().getLangOpts().CUDAIsDevice) {
 // Coerce pointer arguments with default address space to CrossWorkGroup
@@ -78,18 +101,52 @@ ABIArgInfo 
SPIRVABIInfo::classifyKernelArgumentType(QualType Ty) const {
   return ABIArgInfo::getDirect(LTy, 0, nullptr, false);
 }
 
-// Force copying aggregate type in kernel arguments by value when
-// compiling CUDA targeting SPIR-V. This is required for the object
-// copied to be valid on the device.
-// This behavior follows the CUDA spec
-// 
https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#global-function-argument-processing,
-// and matches the NVPTX implementation.
-if (isAggregateTypeForABI(Ty))
-  return getNaturalAlignIndirect(Ty, /* byval */ true);
+   if (isAggregateTypeForABI(Ty)) {
+  if (getTarget().getTriple().getVendor() == llvm::Triple::AMD)
+// TODO: The AMDGPU kernel ABI passes aggregates byref, which is not
+// currently expressible in SPIR-V; SPIR-V passes aggregates byval,
+// which the AMDGPU kernel ABI does not allow. Passing aggregates as
+// direct works around this impedance mismatch, as it retains type info
+// and can be correctly handled, post reverse-translation, by the 
AMDGPU
+// BE, which has to support this CC for legacy OpenCL purposes. It can
+// be brittle and does lead to performance degradation in certain
+// pathological cases. This will be revisited / optimised in the 
future,
+// once a way to deal with the byref/byval impedance mismatch is
+// identified.
+return ABIArgInfo::getDirect(LTy, 0, nullptr, false);
+  else
+// Force copying aggregate type in kernel arguments by value when
+// compiling CUDA targeting SPIR-V. This is required for the object
+// copied to be valid on the device.
+// This behavior follows the CUDA spec
+// 
https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#global-function-argument-processing,
+// and matches the NVPTX implementation.
+return getNaturalAlignIndirect(Ty, /* byval */ true);
+}
   }
   return classifyArgumentType(Ty);
 }
 
+ABIArgInfo SPIRVABIInfo::classifyArgumentType(QualType Ty) const {
+  if (getTarget().getTriple().getVendor() != llvm::Triple::AMD)
+return DefaultABIInfo::classifyArgumentType(Ty);
+  if (!isAggregateTypeForABI(Ty))
+return DefaultABIInfo::classifyArgumentType(Ty);
+
+  // Records with non-trivial destructors/copy-constructors should not be
+  // passed by value.
+  if (auto RAA = getRecor

[clang] [clang][CodeGen][SPIR-V][AMDGPU] Tweak AMDGCNSPIRV ABI to allow for the correct handling of aggregates passed to kernels / functions. (PR #102776)

2024-08-10 Thread Alex Voicu via cfe-commits

https://github.com/AlexVlx ready_for_review 
https://github.com/llvm/llvm-project/pull/102776
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [clang][CodeGen][SPIR-V][AMDGPU] Tweak AMDGCNSPIRV ABI to allow for the correct handling of aggregates passed to kernels / functions. (PR #102776)

2024-08-10 Thread Alex Voicu via cfe-commits

https://github.com/AlexVlx created 
https://github.com/llvm/llvm-project/pull/102776

The AMDGPU kernel ABI is not directly representable in SPIR-V, since it relies 
on passing aggregates `byref`, and SPIR-V only encodes `byval` (which the 
AMDGPU BE disallows for kernel arguments). As a temporary solution to this 
mismatch, we add special handling for AMDGCN flavoured SPIR-V, whereby 
aggregates are passed as direct, both to kernels and to normal functions. This 
is not ideal (there are pathological cases where performance is heavily 
impacted), but empirically robust and guaranteed to work as the AMDGPU BE 
retains handling of `direct` passing for legacy reasons.

We will revisit this in the future, but as it stands it is enough to pass a 
wide array of integration tests and generates correct SPIR-V and correct 
reverse translation into LLVM IR. The amdgpu-kernel-arg-pointer-type test is 
updated via the automated script, and thus becomes quite noisy.

>From d41faf6da8a9eed8c32f6a62fa9ebf38d5824c2c Mon Sep 17 00:00:00 2001
From: Alex Voicu 
Date: Sun, 11 Aug 2024 01:39:46 +0300
Subject: [PATCH] Tweak AMDGCNSPIRV ABI to allow for the correct handling of
 aggregates passed to kernels / functions.

---
 clang/lib/CodeGen/Targets/SPIR.cpp|  73 +-
 .../amdgpu-kernel-arg-pointer-type.cu | 723 --
 clang/test/CodeGenCUDA/kernel-args.cu |   6 +
 3 files changed, 731 insertions(+), 71 deletions(-)

diff --git a/clang/lib/CodeGen/Targets/SPIR.cpp 
b/clang/lib/CodeGen/Targets/SPIR.cpp
index cf068cbc4fcd36..1319332635b863 100644
--- a/clang/lib/CodeGen/Targets/SPIR.cpp
+++ b/clang/lib/CodeGen/Targets/SPIR.cpp
@@ -32,7 +32,9 @@ class SPIRVABIInfo : public CommonSPIRABIInfo {
   void computeInfo(CGFunctionInfo &FI) const override;
 
 private:
+  ABIArgInfo classifyReturnType(QualType RetTy) const;
   ABIArgInfo classifyKernelArgumentType(QualType Ty) const;
+  ABIArgInfo classifyArgumentType(QualType Ty) const;
 };
 } // end anonymous namespace
 namespace {
@@ -64,6 +66,27 @@ void CommonSPIRABIInfo::setCCs() {
   RuntimeCC = llvm::CallingConv::SPIR_FUNC;
 }
 
+ABIArgInfo SPIRVABIInfo::classifyReturnType(QualType RetTy) const {
+  if (getTarget().getTriple().getVendor() != llvm::Triple::AMD)
+return DefaultABIInfo::classifyReturnType(RetTy);
+  if (!isAggregateTypeForABI(RetTy) || getRecordArgABI(RetTy, getCXXABI()))
+return DefaultABIInfo::classifyReturnType(RetTy);
+
+  if (const RecordType *RT = RetTy->getAs()) {
+const RecordDecl *RD = RT->getDecl();
+if (RD->hasFlexibleArrayMember())
+  return DefaultABIInfo::classifyReturnType(RetTy);
+  }
+
+  // TODO: The AMDGPU ABI is non-trivial to represent in SPIR-V; in order to
+  // avoid encoding various architecture specific bits here we return 
everything
+  // as direct to retain type info for things like aggregates, for later 
perusal
+  // when translating back to LLVM/lowering in the BE. This is also why we
+  // disable flattening as the outcomes can mismatch between SPIR-V and AMDGPU.
+  // This will be revisited / optimised in the future.
+  return ABIArgInfo::getDirect(CGT.ConvertType(RetTy), 0u, nullptr, false);
+}
+
 ABIArgInfo SPIRVABIInfo::classifyKernelArgumentType(QualType Ty) const {
   if (getContext().getLangOpts().CUDAIsDevice) {
 // Coerce pointer arguments with default address space to CrossWorkGroup
@@ -78,18 +101,52 @@ ABIArgInfo 
SPIRVABIInfo::classifyKernelArgumentType(QualType Ty) const {
   return ABIArgInfo::getDirect(LTy, 0, nullptr, false);
 }
 
-// Force copying aggregate type in kernel arguments by value when
-// compiling CUDA targeting SPIR-V. This is required for the object
-// copied to be valid on the device.
-// This behavior follows the CUDA spec
-// 
https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#global-function-argument-processing,
-// and matches the NVPTX implementation.
-if (isAggregateTypeForABI(Ty))
-  return getNaturalAlignIndirect(Ty, /* byval */ true);
+   if (isAggregateTypeForABI(Ty)) {
+  if (getTarget().getTriple().getVendor() == llvm::Triple::AMD)
+// TODO: The AMDGPU kernel ABI passes aggregates byref, which is not
+// currently expressible in SPIR-V; SPIR-V passes aggregates byval,
+// which the AMDGPU kernel ABI does not allow. Passing aggregates as
+// direct works around this impedance mismatch, as it retains type info
+// and can be correctly handled, post reverse-translation, by the 
AMDGPU
+// BE, which has to support this CC for legacy OpenCL purposes. It can
+// be brittle and does lead to performance degradation in certain
+// pathological cases. This will be revisited / optimised in the 
future,
+// once a way to deal with the byref/byval impedance mismatch is
+// identified.
+return ABIArgInfo::getDirect(LTy, 0, nullptr, false);
+  else
+// Force copying aggregate type in kernel

[clang] [llvm] [clang][CodeGen][AMDGPU] Enable AMDGPU `printf` for `spirv64-amd-amdhsa` (PR #97132)

2024-07-05 Thread Alex Voicu via cfe-commits

AlexVlx wrote:

> LLVM Buildbot has detected a new failure on builder `sanitizer-ppc64le-linux` 
> running on `ppc64le-sanitizer` while building `clang,llvm` at step 2 
> "annotate".
> 
> Full details are available at: 
> https://lab.llvm.org/buildbot/#/builders/72/builds/839
> 
> Here is the relevant piece of the build log for the reference:
> 
> ```
> Step 2 (annotate) failure: 'python 
> ../sanitizer_buildbot/sanitizers/zorg/buildbot/builders/sanitizers/buildbot_selector.py'
>  (failure)
> ...
> XFAIL: SanitizerCommon-msan-powerpc64le-Linux :: Posix/dump_registers.cpp 
> (365 of 2450)
> PASS: ThreadSanitizer-powerpc64le :: bench_ten_mutexes.cpp (366 of 2450)
> PASS: SanitizerCommon-tsan-powerpc64le-Linux :: Posix/lstat.cpp (367 of 2450)
> PASS: MemorySanitizer-POWERPC64LE :: Linux/glob_altdirfunc.cpp (368 of 2450)
> PASS: ThreadSanitizer-powerpc64le :: atomic_hle.cpp (369 of 2450)
> PASS: LeakSanitizer-Standalone-powerpc64le :: TestCases/Linux/dso-unknown.cpp 
> (370 of 2450)
> PASS: ThreadSanitizer-powerpc64le :: print_full_thread_history.cpp (371 of 
> 2450)
> PASS: LeakSanitizer-Standalone-powerpc64le :: TestCases/disabler.cpp (372 of 
> 2450)
> PASS: SanitizerCommon-tsan-powerpc64le-Linux :: Linux/ptrace.cpp (373 of 2450)
> PASS: ScudoStandalone-Unit :: ./ScudoCUnitTest-powerpc64le-Test/13/14 (374 of 
> 2450)
> FAIL: ThreadSanitizer-powerpc64le :: signal_block.cpp (375 of 2450)
>  TEST 'ThreadSanitizer-powerpc64le :: signal_block.cpp' 
> FAILED 
> Exit Code: 1
> 
> Command Output (stderr):
> --
> RUN: at line 1: 
> /home/buildbots/llvm-external-buildbots/workers/ppc64le-sanitizer/sanitizer-ppc64le/build/build_debug/./bin/clang
>   -fsanitize=thread -Wall  -m64 -fno-function-sections   -gline-tables-only 
> -I/home/buildbots/llvm-external-buildbots/workers/ppc64le-sanitizer/sanitizer-ppc64le/build/llvm-project/compiler-rt/test/tsan/../
>  -O1 
> /home/buildbots/llvm-external-buildbots/workers/ppc64le-sanitizer/sanitizer-ppc64le/build/llvm-project/compiler-rt/test/tsan/signal_block.cpp
>  -o 
> /home/buildbots/llvm-external-buildbots/workers/ppc64le-sanitizer/sanitizer-ppc64le/build/build_debug/runtimes/runtimes-bins/compiler-rt/test/tsan/POWERPC64LEConfig/Output/signal_block.cpp.tmp
>  &&  
> /home/buildbots/llvm-external-buildbots/workers/ppc64le-sanitizer/sanitizer-ppc64le/build/build_debug/runtimes/runtimes-bins/compiler-rt/test/tsan/POWERPC64LEConfig/Output/signal_block.cpp.tmp
>  2>&1 | FileCheck 
> /home/buildbots/llvm-external-buildbots/workers/ppc64le-sanitizer/sanitizer-ppc64le/build/llvm-project/compiler-rt/test/tsan/signal_block.cpp
> + 
> /home/buildbots/llvm-external-buildbots/workers/ppc64le-sanitizer/sanitizer-ppc64le/build/build_debug/./bin/clang
>  -fsanitize=thread -Wall -m64 -fno-function-sections -gline-tables-only 
> -I/home/buildbots/llvm-external-buildbots/workers/ppc64le-sanitizer/sanitizer-ppc64le/build/llvm-project/compiler-rt/test/tsan/../
>  -O1 
> /home/buildbots/llvm-external-buildbots/workers/ppc64le-sanitizer/sanitizer-ppc64le/build/llvm-project/compiler-rt/test/tsan/signal_block.cpp
>  -o 
> /home/buildbots/llvm-external-buildbots/workers/ppc64le-sanitizer/sanitizer-ppc64le/build/build_debug/runtimes/runtimes-bins/compiler-rt/test/tsan/POWERPC64LEConfig/Output/signal_block.cpp.tmp
> + 
> /home/buildbots/llvm-external-buildbots/workers/ppc64le-sanitizer/sanitizer-ppc64le/build/build_debug/runtimes/runtimes-bins/compiler-rt/test/tsan/POWERPC64LEConfig/Output/signal_block.cpp.tmp
> + FileCheck 
> /home/buildbots/llvm-external-buildbots/workers/ppc64le-sanitizer/sanitizer-ppc64le/build/llvm-project/compiler-rt/test/tsan/signal_block.cpp
> /home/buildbots/llvm-external-buildbots/workers/ppc64le-sanitizer/sanitizer-ppc64le/build/llvm-project/compiler-rt/test/tsan/signal_block.cpp:59:15:
>  error: CHECK-NOT: excluded string found in input
> // CHECK-NOT: WARNING: ThreadSanitizer:
>   ^
> :2:1: note: found here
> WARNING: ThreadSanitizer: signal handler spoils errno (pid=3242989)
> ^
> 
> Input file: 
> Check file: 
> /home/buildbots/llvm-external-buildbots/workers/ppc64le-sanitizer/sanitizer-ppc64le/build/llvm-project/compiler-rt/test/tsan/signal_block.cpp
> 
> -dump-input=help explains the following input dump.
> 
> Input was:
> <<
> 1: == 
> 2: WARNING: ThreadSanitizer: signal handler spoils errno 
> (pid=3242989) 
> not:59 !  
>   error: no match expected
> 3:  Signal 10 handler invoked at: 
> 4:  #0 handler(int) 
> /home/buildbots/llvm-external-buildbots/workers/ppc64le-sanitizer/sanitizer-ppc64le/build/llvm-project/compiler-rt/test/tsan/signal_block.cpp:13
>  (signal_block.cpp.tmp+0xfea60) 
> 5:  
> 6: SUMMARY: ThreadSanitizer: signal handler spoils errno 
> /home/buildbots/llvm-external-buildbots/workers/ppc64le-sanitizer/sanitizer-ppc64le/build/llvm-project/c

[clang] [llvm] [clang][CodeGen][AMDGPU] Enable AMDGPU `printf` for `spirv64-amd-amdhsa` (PR #97132)

2024-07-05 Thread Alex Voicu via cfe-commits

https://github.com/AlexVlx closed 
https://github.com/llvm/llvm-project/pull/97132
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [clang][CodeGen][AMDGPU] Enable AMDGPU `printf` for `spirv64-amd-amdhsa` (PR #97132)

2024-07-01 Thread Alex Voicu via cfe-commits


@@ -5888,12 +5888,16 @@ RValue CodeGenFunction::EmitBuiltinExpr(const 
GlobalDecl GD, unsigned BuiltinID,
   case Builtin::BI__builtin_printf:
   case Builtin::BIprintf:
 if (getTarget().getTriple().isNVPTX() ||
-getTarget().getTriple().isAMDGCN()) {
+getTarget().getTriple().isAMDGCN() ||
+(getTarget().getTriple().isSPIRV() &&
+ getTarget().getTriple().getVendor() == Triple::VendorType::AMD)) {
   if (getLangOpts().OpenMPIsTargetDevice)
 return EmitOpenMPDevicePrintfCallExpr(E);
   if (getTarget().getTriple().isNVPTX())
 return EmitNVPTXDevicePrintfCallExpr(E);
-  if (getTarget().getTriple().isAMDGCN() && getLangOpts().HIP)
+  if ((getTarget().getTriple().isAMDGCN() ||
+   getTarget().getTriple().isSPIRV()) &&
+  getLangOpts().HIP)

AlexVlx wrote:

OMP seems to have no triple logic, and just unconditionally forwards to their 
`printf` impl if on the device path, so I'm not entirely sure on what the 
question is probing. If you're asking why we're not unconditionally forwarding 
in HIP, if compiling for device, I don't quite know, I'm merely re-using what 
was there. If you're asking why I'm not checking again for the vendor type, and 
merely checking for SPIR-V, it's because it'd be spurious due to the outer 
check. If it's none of those, could you please clarify what you mean?

https://github.com/llvm/llvm-project/pull/97132
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [clang][CodeGen] Add query for a target's flat address space (PR #95728)

2024-06-28 Thread Alex Voicu via cfe-commits

https://github.com/AlexVlx commented:

@jrtc27 @arsenm any additional comments? Are things more palatable in this 
form? Should this be turned into an RFC? Thanks.

https://github.com/llvm/llvm-project/pull/95728
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [clang][CodeGen] Add query for a target's flat address space (PR #95728)

2024-06-28 Thread Alex Voicu via cfe-commits

https://github.com/AlexVlx edited 
https://github.com/llvm/llvm-project/pull/95728
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [clang][CodeGen] Add query for a target's flat address space (PR #95728)

2024-06-28 Thread Alex Voicu via cfe-commits

https://github.com/AlexVlx updated 
https://github.com/llvm/llvm-project/pull/95728

>From 2b500ad9ef2baf27da29146b5a4123dcb75e Mon Sep 17 00:00:00 2001
From: Alex Voicu 
Date: Mon, 17 Jun 2024 02:15:00 +0100
Subject: [PATCH 1/3] Add interface for exposing a target's flat address space,
 if it exists.

---
 clang/include/clang/Basic/TargetInfo.h | 7 +++
 clang/lib/Basic/Targets/AMDGPU.h   | 6 ++
 clang/lib/Basic/Targets/SPIR.h | 4 
 3 files changed, 17 insertions(+)

diff --git a/clang/include/clang/Basic/TargetInfo.h 
b/clang/include/clang/Basic/TargetInfo.h
index 8a6511b9ced83..8841ec5f910d9 100644
--- a/clang/include/clang/Basic/TargetInfo.h
+++ b/clang/include/clang/Basic/TargetInfo.h
@@ -1764,6 +1764,13 @@ class TargetInfo : public TransferrableTargetInfo,
 return 0;
   }
 
+  /// \returns Target specific flat ptr address space; a flat ptr is a ptr that
+  /// can be casted to / from all other target address spaces. If the target
+  /// exposes no such address space / does not care, we return 0.
+  virtual unsigned getFlatPtrAddressSpace() const {
+return 0;
+  }
+
   /// \returns If a target requires an address within a target specific address
   /// space \p AddressSpace to be converted in order to be used, then return 
the
   /// corresponding target specific DWARF address space.
diff --git a/clang/lib/Basic/Targets/AMDGPU.h b/clang/lib/Basic/Targets/AMDGPU.h
index 94d9ba93ed226..d06c7d58fe94c 100644
--- a/clang/lib/Basic/Targets/AMDGPU.h
+++ b/clang/lib/Basic/Targets/AMDGPU.h
@@ -379,6 +379,12 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUTargetInfo final : 
public TargetInfo {
 return static_cast(llvm::AMDGPUAS::CONSTANT_ADDRESS);
   }
 
+  /// \returns Target specific flat ptr address space; a flat ptr is a ptr that
+  /// can be casted to / from all other target address spaces.
+  unsigned getFlatPtrAddressSpace() const override {
+return static_cast(llvm::AMDGPUAS::FLAT_ADDRESS);
+  }
+
   /// \returns If a target requires an address within a target specific address
   /// space \p AddressSpace to be converted in order to be used, then return 
the
   /// corresponding target specific DWARF address space.
diff --git a/clang/lib/Basic/Targets/SPIR.h b/clang/lib/Basic/Targets/SPIR.h
index 37cf9d7921bac..14d235bace960 100644
--- a/clang/lib/Basic/Targets/SPIR.h
+++ b/clang/lib/Basic/Targets/SPIR.h
@@ -182,6 +182,10 @@ class LLVM_LIBRARY_VISIBILITY BaseSPIRTargetInfo : public 
TargetInfo {
 return TargetInfo::VoidPtrBuiltinVaList;
   }
 
+  unsigned getFlatPtrAddressSpace() const override {
+return 4u; // 4 is generic i.e. flat for SPIR & SPIR-V.
+  }
+
   std::optional
   getDWARFAddressSpace(unsigned AddressSpace) const override {
 return AddressSpace;

>From 346877d118700a748bffa8b0aee8633d6991582c Mon Sep 17 00:00:00 2001
From: Alex Voicu 
Date: Mon, 17 Jun 2024 12:58:04 +0100
Subject: [PATCH 2/3] Fix formatting.

---
 clang/include/clang/Basic/TargetInfo.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/clang/include/clang/Basic/TargetInfo.h 
b/clang/include/clang/Basic/TargetInfo.h
index 8841ec5f910d9..a3b60db122cae 100644
--- a/clang/include/clang/Basic/TargetInfo.h
+++ b/clang/include/clang/Basic/TargetInfo.h
@@ -1767,9 +1767,7 @@ class TargetInfo : public TransferrableTargetInfo,
   /// \returns Target specific flat ptr address space; a flat ptr is a ptr that
   /// can be casted to / from all other target address spaces. If the target
   /// exposes no such address space / does not care, we return 0.
-  virtual unsigned getFlatPtrAddressSpace() const {
-return 0;
-  }
+  virtual unsigned getFlatPtrAddressSpace() const { return 0; }
 
   /// \returns If a target requires an address within a target specific address
   /// space \p AddressSpace to be converted in order to be used, then return 
the

>From 8ee059a30fbad6be374a980447601339eea37645 Mon Sep 17 00:00:00 2001
From: Alex Voicu 
Date: Wed, 19 Jun 2024 14:44:53 +0100
Subject: [PATCH 3/3] Rework interface to return optional; flesh out comment.

---
 clang/include/clang/Basic/TargetInfo.h | 14 ++
 clang/lib/Basic/Targets/AMDGPU.h   |  2 +-
 clang/lib/Basic/Targets/SPIR.h |  2 +-
 3 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/clang/include/clang/Basic/TargetInfo.h 
b/clang/include/clang/Basic/TargetInfo.h
index a3b60db122cae..eb5b29dfbb3f0 100644
--- a/clang/include/clang/Basic/TargetInfo.h
+++ b/clang/include/clang/Basic/TargetInfo.h
@@ -1764,10 +1764,16 @@ class TargetInfo : public TransferrableTargetInfo,
 return 0;
   }
 
-  /// \returns Target specific flat ptr address space; a flat ptr is a ptr that
-  /// can be casted to / from all other target address spaces. If the target
-  /// exposes no such address space / does not care, we return 0.
-  virtual unsigned getFlatPtrAddressSpace() const { return 0; }
+  /// \returns If a target exposes a flat ptr address space (a flat ptr is a 
ptr
+  /// that can be caste

[clang] [NFC] [clang][SPIR-V] Use AMDGPU prefix to avoid confusion (PR #96962)

2024-06-28 Thread Alex Voicu via cfe-commits

https://github.com/AlexVlx approved this pull request.

LTGM, thanks! Also, apologies for any inconvenience caused.

https://github.com/llvm/llvm-project/pull/96962
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [clang][docs] Add preliminary documentation for SPIR-V support in the HIPAMD ToolChain (PR #96657)

2024-06-28 Thread Alex Voicu via cfe-commits

https://github.com/AlexVlx closed 
https://github.com/llvm/llvm-project/pull/96657
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [NFC] [clang][SPIR-V] Use AMDGPU prefix to avoid confusion (PR #96962)

2024-06-27 Thread Alex Voicu via cfe-commits


@@ -270,5 +270,5 @@
 // VE: target datalayout = 
"e-m:e-i64:64-n32:64-S128-v64:64:64-v128:64:64-v256:64:64-v512:64:64-v1024:64:64-v2048:64:64-v4096:64:64-v8192:64:64-v16384:64:64"
 
 // RUN: %clang_cc1 -triple spirv64-amd -o - -emit-llvm %s | \
-// RUN: FileCheck %s -check-prefix=SPIR64
-// AMDGPUSPIRV64: target datalayout = 
"e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-G1-P4-A0"
+// RUN: FileCheck %s -check-prefix=AMDGPUSPIRV64
+// AMDGPUSPIRV64: target datalayout = 
"e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-G1"

AlexVlx wrote:

Sure, I can do that; the issue with the test is that we pick whether it's just 
plain SPIRV or AMDGCN flavoured SPIRV based on the OS, not on the vendor, 
please see 
,
 so the fix here is to use the `spirv64-amd-amdhsa` triple for the 
AMDGPUSPIRV64 case. I can do that in this PR or spin-up a new one, whichever 
you prefer.

https://github.com/llvm/llvm-project/pull/96962
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [NFC] [clang][SPIR-V] Use AMDGPU prefix to avoid confusion (PR #96962)

2024-06-27 Thread Alex Voicu via cfe-commits


@@ -270,5 +270,5 @@
 // VE: target datalayout = 
"e-m:e-i64:64-n32:64-S128-v64:64:64-v128:64:64-v256:64:64-v512:64:64-v1024:64:64-v2048:64:64-v4096:64:64-v8192:64:64-v16384:64:64"
 
 // RUN: %clang_cc1 -triple spirv64-amd -o - -emit-llvm %s | \
-// RUN: FileCheck %s -check-prefix=SPIR64
-// AMDGPUSPIRV64: target datalayout = 
"e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-G1-P4-A0"
+// RUN: FileCheck %s -check-prefix=AMDGPUSPIRV64
+// AMDGPUSPIRV64: target datalayout = 
"e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-G1"

AlexVlx wrote:

This change is incorrect, the DataLayout differs and we do encode both the 
Program AS and the alloca AS, please see 
,
 and please don't do this.

https://github.com/llvm/llvm-project/pull/96962
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [NFC] [clang][SPIR-V] Use AMDGPU prefix to avoid confusion (PR #96962)

2024-06-27 Thread Alex Voicu via cfe-commits


@@ -270,5 +270,5 @@
 // VE: target datalayout = 
"e-m:e-i64:64-n32:64-S128-v64:64:64-v128:64:64-v256:64:64-v512:64:64-v1024:64:64-v2048:64:64-v4096:64:64-v8192:64:64-v16384:64:64"
 
 // RUN: %clang_cc1 -triple spirv64-amd -o - -emit-llvm %s | \
-// RUN: FileCheck %s -check-prefix=SPIR64
-// AMDGPUSPIRV64: target datalayout = 
"e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-G1-P4-A0"
+// RUN: FileCheck %s -check-prefix=AMDGPUSPIRV64

AlexVlx wrote:

Thank you for fixing the typo, much appreciated.

https://github.com/llvm/llvm-project/pull/96962
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [NFC] [clang][SPIR-V] Use AMDGPU prefix to avoid confusion (PR #96962)

2024-06-27 Thread Alex Voicu via cfe-commits

https://github.com/AlexVlx edited 
https://github.com/llvm/llvm-project/pull/96962
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [NFC] [clang][SPIR-V] Use AMDGPU prefix to avoid confusion (PR #96962)

2024-06-27 Thread Alex Voicu via cfe-commits

https://github.com/AlexVlx requested changes to this pull request.


https://github.com/llvm/llvm-project/pull/96962
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [clang][CodeGen] Add query for a target's flat address space (PR #95728)

2024-06-27 Thread Alex Voicu via cfe-commits

https://github.com/AlexVlx updated 
https://github.com/llvm/llvm-project/pull/95728

>From 2b500ad9ef2baf27da29146b5a4123dcb75e Mon Sep 17 00:00:00 2001
From: Alex Voicu 
Date: Mon, 17 Jun 2024 02:15:00 +0100
Subject: [PATCH 1/3] Add interface for exposing a target's flat address space,
 if it exists.

---
 clang/include/clang/Basic/TargetInfo.h | 7 +++
 clang/lib/Basic/Targets/AMDGPU.h   | 6 ++
 clang/lib/Basic/Targets/SPIR.h | 4 
 3 files changed, 17 insertions(+)

diff --git a/clang/include/clang/Basic/TargetInfo.h 
b/clang/include/clang/Basic/TargetInfo.h
index 8a6511b9ced83..8841ec5f910d9 100644
--- a/clang/include/clang/Basic/TargetInfo.h
+++ b/clang/include/clang/Basic/TargetInfo.h
@@ -1764,6 +1764,13 @@ class TargetInfo : public TransferrableTargetInfo,
 return 0;
   }
 
+  /// \returns Target specific flat ptr address space; a flat ptr is a ptr that
+  /// can be casted to / from all other target address spaces. If the target
+  /// exposes no such address space / does not care, we return 0.
+  virtual unsigned getFlatPtrAddressSpace() const {
+return 0;
+  }
+
   /// \returns If a target requires an address within a target specific address
   /// space \p AddressSpace to be converted in order to be used, then return 
the
   /// corresponding target specific DWARF address space.
diff --git a/clang/lib/Basic/Targets/AMDGPU.h b/clang/lib/Basic/Targets/AMDGPU.h
index 94d9ba93ed226..d06c7d58fe94c 100644
--- a/clang/lib/Basic/Targets/AMDGPU.h
+++ b/clang/lib/Basic/Targets/AMDGPU.h
@@ -379,6 +379,12 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUTargetInfo final : 
public TargetInfo {
 return static_cast(llvm::AMDGPUAS::CONSTANT_ADDRESS);
   }
 
+  /// \returns Target specific flat ptr address space; a flat ptr is a ptr that
+  /// can be casted to / from all other target address spaces.
+  unsigned getFlatPtrAddressSpace() const override {
+return static_cast(llvm::AMDGPUAS::FLAT_ADDRESS);
+  }
+
   /// \returns If a target requires an address within a target specific address
   /// space \p AddressSpace to be converted in order to be used, then return 
the
   /// corresponding target specific DWARF address space.
diff --git a/clang/lib/Basic/Targets/SPIR.h b/clang/lib/Basic/Targets/SPIR.h
index 37cf9d7921bac..14d235bace960 100644
--- a/clang/lib/Basic/Targets/SPIR.h
+++ b/clang/lib/Basic/Targets/SPIR.h
@@ -182,6 +182,10 @@ class LLVM_LIBRARY_VISIBILITY BaseSPIRTargetInfo : public 
TargetInfo {
 return TargetInfo::VoidPtrBuiltinVaList;
   }
 
+  unsigned getFlatPtrAddressSpace() const override {
+return 4u; // 4 is generic i.e. flat for SPIR & SPIR-V.
+  }
+
   std::optional
   getDWARFAddressSpace(unsigned AddressSpace) const override {
 return AddressSpace;

>From 346877d118700a748bffa8b0aee8633d6991582c Mon Sep 17 00:00:00 2001
From: Alex Voicu 
Date: Mon, 17 Jun 2024 12:58:04 +0100
Subject: [PATCH 2/3] Fix formatting.

---
 clang/include/clang/Basic/TargetInfo.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/clang/include/clang/Basic/TargetInfo.h 
b/clang/include/clang/Basic/TargetInfo.h
index 8841ec5f910d9..a3b60db122cae 100644
--- a/clang/include/clang/Basic/TargetInfo.h
+++ b/clang/include/clang/Basic/TargetInfo.h
@@ -1767,9 +1767,7 @@ class TargetInfo : public TransferrableTargetInfo,
   /// \returns Target specific flat ptr address space; a flat ptr is a ptr that
   /// can be casted to / from all other target address spaces. If the target
   /// exposes no such address space / does not care, we return 0.
-  virtual unsigned getFlatPtrAddressSpace() const {
-return 0;
-  }
+  virtual unsigned getFlatPtrAddressSpace() const { return 0; }
 
   /// \returns If a target requires an address within a target specific address
   /// space \p AddressSpace to be converted in order to be used, then return 
the

>From 8ee059a30fbad6be374a980447601339eea37645 Mon Sep 17 00:00:00 2001
From: Alex Voicu 
Date: Wed, 19 Jun 2024 14:44:53 +0100
Subject: [PATCH 3/3] Rework interface to return optional; flesh out comment.

---
 clang/include/clang/Basic/TargetInfo.h | 14 ++
 clang/lib/Basic/Targets/AMDGPU.h   |  2 +-
 clang/lib/Basic/Targets/SPIR.h |  2 +-
 3 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/clang/include/clang/Basic/TargetInfo.h 
b/clang/include/clang/Basic/TargetInfo.h
index a3b60db122cae..eb5b29dfbb3f0 100644
--- a/clang/include/clang/Basic/TargetInfo.h
+++ b/clang/include/clang/Basic/TargetInfo.h
@@ -1764,10 +1764,16 @@ class TargetInfo : public TransferrableTargetInfo,
 return 0;
   }
 
-  /// \returns Target specific flat ptr address space; a flat ptr is a ptr that
-  /// can be casted to / from all other target address spaces. If the target
-  /// exposes no such address space / does not care, we return 0.
-  virtual unsigned getFlatPtrAddressSpace() const { return 0; }
+  /// \returns If a target exposes a flat ptr address space (a flat ptr is a 
ptr
+  /// that can be caste

[clang] [llvm] [clang][docs] Add preliminary documentation for SPIR-V support in the HIPAMD ToolChain (PR #96657)

2024-06-26 Thread Alex Voicu via cfe-commits


@@ -284,3 +284,48 @@ Example Usage
   Base* basePtr = &obj;
   basePtr->virtualFunction(); // Allowed since obj is constructed in 
device code
}
+
+SPIR-V Support on HIPAMD ToolChain
+==
+
+The HIPAMD ToolChain supports targetting
+`AMDGCN Flavoured SPIR-V 
`_.
+The support for SPIR-V in the ROCm and HIPAMD ToolChain is under active
+development.
+
+Compilation Process
+---
+
+When compiling HIP programs with the intent of utilizing SPIR-V, the process
+diverges from the traditional compilation flow:
+
+Using ``--offload-arch=amdgcnspirv``
+
+
+- **Target Triple**: The ``--offload-arch=amdgcnspirv`` flag instructs the
+  compiler to use the target triple ``spirv64-amd-amdhsa``. This approach does
+  generates generic AMDGCN SPIR-V which retains architecture specific elements
+  without hardcoding them, thus allowing for optimal target specific code to be
+  generated at run time, when the concrete target is known.
+
+- **LLVM IR Translation**: The program is compiled to LLVM Intermediate
+  Representation (IR), which is subsequently translated into SPIR-V. In the
+  future, this translation step will be replaced by direct SPIR-V emission via
+  the SPIR-V Back-end.
+
+- **Clang Offload Bundler**: The resulting SPIR-V is embedded in the Clang
+  offload bundler with the bundle ID ``hipv4-hip-spirv64-amd-amdhsa-generic``.

AlexVlx wrote:

Fixed, thank you!

https://github.com/llvm/llvm-project/pull/96657
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [clang][docs] Add preliminary documentation for SPIR-V support in the HIPAMD ToolChain (PR #96657)

2024-06-26 Thread Alex Voicu via cfe-commits


@@ -284,3 +284,48 @@ Example Usage
   Base* basePtr = &obj;
   basePtr->virtualFunction(); // Allowed since obj is constructed in 
device code
}
+
+SPIR-V Support on HIPAMD ToolChain
+==
+
+The HIPAMD ToolChain supports targetting
+`AMDGCN Flavoured SPIR-V 
`_.
+The support for SPIR-V in the ROCm and HIPAMD ToolChain is under active
+development.
+
+Compilation Process
+---
+
+When compiling HIP programs with the intent of utilizing SPIR-V, the process
+diverges from the traditional compilation flow:
+
+Using ``--offload-arch=amdgcnspirv``
+
+
+- **Target Triple**: The ``--offload-arch=amdgcnspirv`` flag instructs the
+  compiler to use the target triple ``spirv64-amd-amdhsa``. This approach does
+  generates generic AMDGCN SPIR-V which retains architecture specific elements
+  without hardcoding them, thus allowing for optimal target specific code to be
+  generated at run time, when the concrete target is known.
+
+- **LLVM IR Translation**: The program is compiled to LLVM Intermediate
+  Representation (IR), which is subsequently translated into SPIR-V. In the
+  future, this translation step will be replaced by direct SPIR-V emission via
+  the SPIR-V Back-end.
+
+- **Clang Offload Bundler**: The resulting SPIR-V is embedded in the Clang
+  offload bundler with the bundle ID ``hipv4-hip-spirv64-amd-amdhsa-generic``.
+
+Mixed with Normal ``--offload-arch``
+
+
+**Mixing ``amdgcnspirv`` and concrete ``gfx###`` targets via ``--offload-arch``

AlexVlx wrote:

@yxsamliu is correct, the HIPSPIRV toolchain is matched to a different 
downstream ecosystem.

https://github.com/llvm/llvm-project/pull/96657
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [clang][docs] Add preliminary documentation for SPIR-V support in the HIPAMD ToolChain (PR #96657)

2024-06-25 Thread Alex Voicu via cfe-commits

AlexVlx wrote:

> > > I'll need to play with this with my driver code. I'm guessing it's 
> > > because it needs to generate an entirely separate toolchain? The OpenMP 
> > > path basically does that by inferring the toolchain from the string 
> > > value, so we can support `--offload-arch=sm_89,gfx90a` for example.
> > 
> > 
> > Not quite, it's more because we'd have to nest two triples 
> > (`spirv64-amd-amdhsa` && `amdgcn-amd-amdhsa`) within the same toolchain, 
> > since we're using the same HIPAMD ToolChain. It's fixable, just slightly 
> > faffy to do without spamming toolchains / within the same toolchain.
> 
> Honestly I'm wondering if it would be cleaner to just make a 
> `HIPSPIRVToolChain` since we may want special handling in the future.

I looked at this and it doesn't look super appealing (turned into something of 
a rabbit hole), it'd duplicate a lot of the existing toolchain, and would also 
try to squat in an already overcrowded space (there's already HIPSPV). 

https://github.com/llvm/llvm-project/pull/96657
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [clang][docs] Add preliminary documentation for SPIR-V support in the HIPAMD ToolChain (PR #96657)

2024-06-25 Thread Alex Voicu via cfe-commits

https://github.com/AlexVlx edited 
https://github.com/llvm/llvm-project/pull/96657
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [clang][docs] Add preliminary documentation for SPIR-V support in the HIPAMD ToolChain (PR #96657)

2024-06-25 Thread Alex Voicu via cfe-commits

https://github.com/AlexVlx commented:

> I'll need to play with this with my driver code. I'm guessing it's because it 
> needs to generate an entirely separate toolchain? The OpenMP path basically 
> does that by inferring the toolchain from the string value, so we can support 
> `--offload-arch=sm_89,gfx90a` for example. 

Not quite, it's more because we'd have to nest two triples 
(`spirv64-amd-amdhsa` && `amdgcn-amd-amdhsa`) within the same toolchain, since 
we're using the same HIPAMD ToolChain. It's fixable, just slightly faffy to do 
without spamming toolchains / within the same toolchain.

https://github.com/llvm/llvm-project/pull/96657
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [clang][Driver] Add HIPAMD Driver support for AMDGCN flavoured SPIR-V (PR #95061)

2024-06-25 Thread Alex Voicu via cfe-commits

https://github.com/AlexVlx closed 
https://github.com/llvm/llvm-project/pull/95061
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [clang][Driver] Add HIPAMD Driver support for AMDGCN flavoured SPIR-V (PR #95061)

2024-06-24 Thread Alex Voicu via cfe-commits


@@ -147,6 +147,14 @@ getNVIDIAOffloadTargetTriple(const Driver &D, const 
ArgList &Args,
 static std::optional
 getHIPOffloadTargetTriple(const Driver &D, const ArgList &Args) {
   if (!Args.hasArg(options::OPT_offload_EQ)) {
+auto OffloadArchs = Args.getAllArgValues(options::OPT_offload_arch_EQ);
+if (llvm::find(OffloadArchs, "amdgcnspirv") != OffloadArchs.cend()) {
+  if (OffloadArchs.size() == 1)
+return llvm::Triple("spirv64-amd-amdhsa");

AlexVlx wrote:

Thanks for the pointer, much appreciated! I will revisit this/take your 
suggestion, but for the initial experimental support it was deemed acceptable 
to carry this temporary limitation (unless you strongly object, of course).

https://github.com/llvm/llvm-project/pull/95061
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [clang][Driver] Add HIPAMD Driver support for AMDGCN flavoured SPIR-V (PR #95061)

2024-06-21 Thread Alex Voicu via cfe-commits

https://github.com/AlexVlx edited 
https://github.com/llvm/llvm-project/pull/95061
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [clang][CodeGen] Add query for a target's flat address space (PR #95728)

2024-06-19 Thread Alex Voicu via cfe-commits

https://github.com/AlexVlx updated 
https://github.com/llvm/llvm-project/pull/95728

>From 2b500ad9ef2baf27da29146b5a4123dcb75e Mon Sep 17 00:00:00 2001
From: Alex Voicu 
Date: Mon, 17 Jun 2024 02:15:00 +0100
Subject: [PATCH 1/3] Add interface for exposing a target's flat address space,
 if it exists.

---
 clang/include/clang/Basic/TargetInfo.h | 7 +++
 clang/lib/Basic/Targets/AMDGPU.h   | 6 ++
 clang/lib/Basic/Targets/SPIR.h | 4 
 3 files changed, 17 insertions(+)

diff --git a/clang/include/clang/Basic/TargetInfo.h 
b/clang/include/clang/Basic/TargetInfo.h
index 8a6511b9ced83..8841ec5f910d9 100644
--- a/clang/include/clang/Basic/TargetInfo.h
+++ b/clang/include/clang/Basic/TargetInfo.h
@@ -1764,6 +1764,13 @@ class TargetInfo : public TransferrableTargetInfo,
 return 0;
   }
 
+  /// \returns Target specific flat ptr address space; a flat ptr is a ptr that
+  /// can be casted to / from all other target address spaces. If the target
+  /// exposes no such address space / does not care, we return 0.
+  virtual unsigned getFlatPtrAddressSpace() const {
+return 0;
+  }
+
   /// \returns If a target requires an address within a target specific address
   /// space \p AddressSpace to be converted in order to be used, then return 
the
   /// corresponding target specific DWARF address space.
diff --git a/clang/lib/Basic/Targets/AMDGPU.h b/clang/lib/Basic/Targets/AMDGPU.h
index 94d9ba93ed226..d06c7d58fe94c 100644
--- a/clang/lib/Basic/Targets/AMDGPU.h
+++ b/clang/lib/Basic/Targets/AMDGPU.h
@@ -379,6 +379,12 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUTargetInfo final : 
public TargetInfo {
 return static_cast(llvm::AMDGPUAS::CONSTANT_ADDRESS);
   }
 
+  /// \returns Target specific flat ptr address space; a flat ptr is a ptr that
+  /// can be casted to / from all other target address spaces.
+  unsigned getFlatPtrAddressSpace() const override {
+return static_cast(llvm::AMDGPUAS::FLAT_ADDRESS);
+  }
+
   /// \returns If a target requires an address within a target specific address
   /// space \p AddressSpace to be converted in order to be used, then return 
the
   /// corresponding target specific DWARF address space.
diff --git a/clang/lib/Basic/Targets/SPIR.h b/clang/lib/Basic/Targets/SPIR.h
index 37cf9d7921bac..14d235bace960 100644
--- a/clang/lib/Basic/Targets/SPIR.h
+++ b/clang/lib/Basic/Targets/SPIR.h
@@ -182,6 +182,10 @@ class LLVM_LIBRARY_VISIBILITY BaseSPIRTargetInfo : public 
TargetInfo {
 return TargetInfo::VoidPtrBuiltinVaList;
   }
 
+  unsigned getFlatPtrAddressSpace() const override {
+return 4u; // 4 is generic i.e. flat for SPIR & SPIR-V.
+  }
+
   std::optional
   getDWARFAddressSpace(unsigned AddressSpace) const override {
 return AddressSpace;

>From 346877d118700a748bffa8b0aee8633d6991582c Mon Sep 17 00:00:00 2001
From: Alex Voicu 
Date: Mon, 17 Jun 2024 12:58:04 +0100
Subject: [PATCH 2/3] Fix formatting.

---
 clang/include/clang/Basic/TargetInfo.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/clang/include/clang/Basic/TargetInfo.h 
b/clang/include/clang/Basic/TargetInfo.h
index 8841ec5f910d9..a3b60db122cae 100644
--- a/clang/include/clang/Basic/TargetInfo.h
+++ b/clang/include/clang/Basic/TargetInfo.h
@@ -1767,9 +1767,7 @@ class TargetInfo : public TransferrableTargetInfo,
   /// \returns Target specific flat ptr address space; a flat ptr is a ptr that
   /// can be casted to / from all other target address spaces. If the target
   /// exposes no such address space / does not care, we return 0.
-  virtual unsigned getFlatPtrAddressSpace() const {
-return 0;
-  }
+  virtual unsigned getFlatPtrAddressSpace() const { return 0; }
 
   /// \returns If a target requires an address within a target specific address
   /// space \p AddressSpace to be converted in order to be used, then return 
the

>From 8ee059a30fbad6be374a980447601339eea37645 Mon Sep 17 00:00:00 2001
From: Alex Voicu 
Date: Wed, 19 Jun 2024 14:44:53 +0100
Subject: [PATCH 3/3] Rework interface to return optional; flesh out comment.

---
 clang/include/clang/Basic/TargetInfo.h | 14 ++
 clang/lib/Basic/Targets/AMDGPU.h   |  2 +-
 clang/lib/Basic/Targets/SPIR.h |  2 +-
 3 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/clang/include/clang/Basic/TargetInfo.h 
b/clang/include/clang/Basic/TargetInfo.h
index a3b60db122cae..eb5b29dfbb3f0 100644
--- a/clang/include/clang/Basic/TargetInfo.h
+++ b/clang/include/clang/Basic/TargetInfo.h
@@ -1764,10 +1764,16 @@ class TargetInfo : public TransferrableTargetInfo,
 return 0;
   }
 
-  /// \returns Target specific flat ptr address space; a flat ptr is a ptr that
-  /// can be casted to / from all other target address spaces. If the target
-  /// exposes no such address space / does not care, we return 0.
-  virtual unsigned getFlatPtrAddressSpace() const { return 0; }
+  /// \returns If a target exposes a flat ptr address space (a flat ptr is a 
ptr
+  /// that can be caste

[clang] [clang][CodeGen] Add query for a target's flat address space (PR #95728)

2024-06-19 Thread Alex Voicu via cfe-commits

https://github.com/AlexVlx updated 
https://github.com/llvm/llvm-project/pull/95728

>From 2b500ad9ef2baf27da29146b5a4123dcb75e Mon Sep 17 00:00:00 2001
From: Alex Voicu 
Date: Mon, 17 Jun 2024 02:15:00 +0100
Subject: [PATCH 1/2] Add interface for exposing a target's flat address space,
 if it exists.

---
 clang/include/clang/Basic/TargetInfo.h | 7 +++
 clang/lib/Basic/Targets/AMDGPU.h   | 6 ++
 clang/lib/Basic/Targets/SPIR.h | 4 
 3 files changed, 17 insertions(+)

diff --git a/clang/include/clang/Basic/TargetInfo.h 
b/clang/include/clang/Basic/TargetInfo.h
index 8a6511b9ced83..8841ec5f910d9 100644
--- a/clang/include/clang/Basic/TargetInfo.h
+++ b/clang/include/clang/Basic/TargetInfo.h
@@ -1764,6 +1764,13 @@ class TargetInfo : public TransferrableTargetInfo,
 return 0;
   }
 
+  /// \returns Target specific flat ptr address space; a flat ptr is a ptr that
+  /// can be casted to / from all other target address spaces. If the target
+  /// exposes no such address space / does not care, we return 0.
+  virtual unsigned getFlatPtrAddressSpace() const {
+return 0;
+  }
+
   /// \returns If a target requires an address within a target specific address
   /// space \p AddressSpace to be converted in order to be used, then return 
the
   /// corresponding target specific DWARF address space.
diff --git a/clang/lib/Basic/Targets/AMDGPU.h b/clang/lib/Basic/Targets/AMDGPU.h
index 94d9ba93ed226..d06c7d58fe94c 100644
--- a/clang/lib/Basic/Targets/AMDGPU.h
+++ b/clang/lib/Basic/Targets/AMDGPU.h
@@ -379,6 +379,12 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUTargetInfo final : 
public TargetInfo {
 return static_cast(llvm::AMDGPUAS::CONSTANT_ADDRESS);
   }
 
+  /// \returns Target specific flat ptr address space; a flat ptr is a ptr that
+  /// can be casted to / from all other target address spaces.
+  unsigned getFlatPtrAddressSpace() const override {
+return static_cast(llvm::AMDGPUAS::FLAT_ADDRESS);
+  }
+
   /// \returns If a target requires an address within a target specific address
   /// space \p AddressSpace to be converted in order to be used, then return 
the
   /// corresponding target specific DWARF address space.
diff --git a/clang/lib/Basic/Targets/SPIR.h b/clang/lib/Basic/Targets/SPIR.h
index 37cf9d7921bac..14d235bace960 100644
--- a/clang/lib/Basic/Targets/SPIR.h
+++ b/clang/lib/Basic/Targets/SPIR.h
@@ -182,6 +182,10 @@ class LLVM_LIBRARY_VISIBILITY BaseSPIRTargetInfo : public 
TargetInfo {
 return TargetInfo::VoidPtrBuiltinVaList;
   }
 
+  unsigned getFlatPtrAddressSpace() const override {
+return 4u; // 4 is generic i.e. flat for SPIR & SPIR-V.
+  }
+
   std::optional
   getDWARFAddressSpace(unsigned AddressSpace) const override {
 return AddressSpace;

>From 346877d118700a748bffa8b0aee8633d6991582c Mon Sep 17 00:00:00 2001
From: Alex Voicu 
Date: Mon, 17 Jun 2024 12:58:04 +0100
Subject: [PATCH 2/2] Fix formatting.

---
 clang/include/clang/Basic/TargetInfo.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/clang/include/clang/Basic/TargetInfo.h 
b/clang/include/clang/Basic/TargetInfo.h
index 8841ec5f910d9..a3b60db122cae 100644
--- a/clang/include/clang/Basic/TargetInfo.h
+++ b/clang/include/clang/Basic/TargetInfo.h
@@ -1767,9 +1767,7 @@ class TargetInfo : public TransferrableTargetInfo,
   /// \returns Target specific flat ptr address space; a flat ptr is a ptr that
   /// can be casted to / from all other target address spaces. If the target
   /// exposes no such address space / does not care, we return 0.
-  virtual unsigned getFlatPtrAddressSpace() const {
-return 0;
-  }
+  virtual unsigned getFlatPtrAddressSpace() const { return 0; }
 
   /// \returns If a target requires an address within a target specific address
   /// space \p AddressSpace to be converted in order to be used, then return 
the

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [clang][Driver] Add HIPAMD Driver support for AMDGCN flavoured SPIR-V (PR #95061)

2024-06-18 Thread Alex Voicu via cfe-commits


@@ -907,7 +907,8 @@ void CodeGenModule::Release() {
   if (Context.getTargetInfo().getTriple().isWasm())
 EmitMainVoidAlias();
 
-  if (getTriple().isAMDGPU()) {
+  if (getTriple().isAMDGPU() ||
+  (getTriple().isSPIRV() && getTriple().getVendor() == llvm::Triple::AMD)) 
{

AlexVlx wrote:

> I didn't know the vendor got used for anything.

This matches how we've documented it when we added the AMDGCN flavoured SPIR-V, 
and seemed to reflect the idea that this is SPIRV(64) with customisations for 
AMD as the vendor; do you think it is or can become problematic?

https://github.com/llvm/llvm-project/pull/95061
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [clang][Driver] Add HIPAMD Driver support for AMDGCN flavoured SPIR-V (PR #95061)

2024-06-18 Thread Alex Voicu via cfe-commits


@@ -907,7 +907,8 @@ void CodeGenModule::Release() {
   if (Context.getTargetInfo().getTriple().isWasm())
 EmitMainVoidAlias();
 
-  if (getTriple().isAMDGPU()) {
+  if (getTriple().isAMDGPU() ||
+  (getTriple().isSPIRV() && getTriple().getVendor() == llvm::Triple::AMD)) 
{

AlexVlx wrote:

> I'm wondering if we should add `isAMD` to `llvm::Triple` or something.

We do have `isAMDGCN`, but that reflects the Arch. I guess it might be 
convenient sugar to have a predicate on vendors as well? I don't find the 
manual check excessively offensive FWIW.



https://github.com/llvm/llvm-project/pull/95061
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [clang][CodeGen] Add query for a target's flat address space (PR #95728)

2024-06-17 Thread Alex Voicu via cfe-commits


@@ -1764,6 +1764,13 @@ class TargetInfo : public TransferrableTargetInfo,
 return 0;
   }
 
+  /// \returns Target specific flat ptr address space; a flat ptr is a ptr that
+  /// can be casted to / from all other target address spaces. If the target
+  /// exposes no such address space / does not care, we return 0.

AlexVlx wrote:

> > here's no such thing as LangAS::Generic, there's LangAS::Default.
> 
> Right, so query the target address space map for that.
> 
Ok, I queried LangAS::Default and I got 
[this](https://github.com/llvm/llvm-project/blob/08de0b3cf54e4998799673f835e9a7d5ead8efab/clang/lib/Basic/Targets/AMDGPU.cpp#L65)
 because of 
[this](https://github.com/llvm/llvm-project/blob/08de0b3cf54e4998799673f835e9a7d5ead8efab/clang/lib/Basic/Targets/AMDGPU.cpp#L256).
 What's next?

> The IR does not have the concept of a generic, flat, or no address space. 0 
> is the "default address space", which has its own specific properties.
> 

What are these specific properties and where are they documented? Also, 0 
doesn't quite look like the default address space, at least not de jure, see 
above; it's not even the default address space for all uses of AMDGPU.

> > this merely maintains the status quo, wherein everybody grabs an AS 0 
> > pointer and hopes for the best.
> 
> For the most part this isn't true, at least in llvm proper.
>

Sure, but this is in Clang, which pretty liberally uses things like 
`getUnqualPtrTy` and hopes it will work. Elsewhere you are essentially arguing 
for exactly the same thing, namely the FE should just use 0 unconditionally. I 
am saying that is wrong in general, and the next best thing is to query the 
target if it happens to have any opinions regarding what the AS one should use 
everywhere is. Just like we're already asking if it has any opinions about 
something far more niche, namely vtable ptrs), and then revert to the status 
quo which is use 0. In Clang. Because, otherwise, your request for the FE to 
"just do the right thing" ends up unsolvable.

https://github.com/llvm/llvm-project/pull/95728
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [clang][CodeGen] Add query for a target's flat address space (PR #95728)

2024-06-17 Thread Alex Voicu via cfe-commits


@@ -1764,6 +1764,13 @@ class TargetInfo : public TransferrableTargetInfo,
 return 0;
   }
 
+  /// \returns Target specific flat ptr address space; a flat ptr is a ptr that
+  /// can be casted to / from all other target address spaces. If the target
+  /// exposes no such address space / does not care, we return 0.

AlexVlx wrote:

> Some targets won't even have a flat address space. But that's fine, because 
> in these specific uses the address space's underlying 
> range/representation/meaning/whatever is completely irrelevant, all that 
> matters is the GlobalObject (GlobalValue?) you get once you strip away an 
> addrspacecasts. So I object to this on several grounds:
> 
> 1. A flat address space may not exist
> 2. These uses don't care about the address space beyond being consistent 
> across files
> 3. The existence of this API invites people to start using it for other 
> things, and relying on properties of it that you've documented as being true 
> which I don't think we want to be guaranteeing
> 
> If you really object to the arbitrary address space in use, I would rather 
> see the special arrays be made overloaded/polymorphic instead so that the 
> addrspacecasts can be avoided entirely .

@jrtc27 this is not purely bespoke for the special arrays (sadly). The core 
issue is the prevalent, unguarded/unassuming use of AS 0.  Since targets are 
not really bound to do anything special with 0 (there are no words anywhere 
saying 0 has any specific properties), problematic / invalid situations arise. 
This leads to a perpetual game of whack-a-mole as e.g. one brings up a more 
general purpose language, which is not quite ideal.

Strictly regarding the special purpose arrays, I object to the arbitrary 
address space because it actually breaks targets as things stand today, for 
example: SPIR-V uses 0 for private, if we just use 0 we end up trying to 
generate invalid SPIR-V that casts from global (CrossWorkgroup) to private 
(Function) when populating them. Yes we can then go and add special handling 
just for the special arrays etc. in the SPIR-V BE/the downstream Translator, 
but that seems less than ideal to me.

I'd like to address (1) and (3) (I did worry about those, but we have an 
interface that returns a vtbl ptr address space, and it was not clear why 0, 
which on some targets points to a limited 
visibility/capacity/private/unambiguously non-global private AS, would be valid 
for a vtable) - instead of unconditionally returning an integer, we could 
return an `optional`, with the default being to return `std::nullopt`. This'd 
constrain to uses to folks who know what they want, and why they want it, as 
opposed to trivial drop-in replacement.

https://github.com/llvm/llvm-project/pull/95728
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [clang][Driver] Add HIPAMD Driver support for AMDGCN flavoured SPIR-V (PR #95061)

2024-06-17 Thread Alex Voicu via cfe-commits

https://github.com/AlexVlx commented:

Gentle ping.

https://github.com/llvm/llvm-project/pull/95061
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [clang][Driver] Add HIPAMD Driver support for AMDGCN flavoured SPIR-V (PR #95061)

2024-06-17 Thread Alex Voicu via cfe-commits

https://github.com/AlexVlx edited 
https://github.com/llvm/llvm-project/pull/95061
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [clang][CodeGen] Add query for a target's flat address space (PR #95728)

2024-06-17 Thread Alex Voicu via cfe-commits

https://github.com/AlexVlx updated 
https://github.com/llvm/llvm-project/pull/95728

>From 2b500ad9ef2baf27da29146b5a4123dcb75e Mon Sep 17 00:00:00 2001
From: Alex Voicu 
Date: Mon, 17 Jun 2024 02:15:00 +0100
Subject: [PATCH 1/2] Add interface for exposing a target's flat address space,
 if it exists.

---
 clang/include/clang/Basic/TargetInfo.h | 7 +++
 clang/lib/Basic/Targets/AMDGPU.h   | 6 ++
 clang/lib/Basic/Targets/SPIR.h | 4 
 3 files changed, 17 insertions(+)

diff --git a/clang/include/clang/Basic/TargetInfo.h 
b/clang/include/clang/Basic/TargetInfo.h
index 8a6511b9ced83..8841ec5f910d9 100644
--- a/clang/include/clang/Basic/TargetInfo.h
+++ b/clang/include/clang/Basic/TargetInfo.h
@@ -1764,6 +1764,13 @@ class TargetInfo : public TransferrableTargetInfo,
 return 0;
   }
 
+  /// \returns Target specific flat ptr address space; a flat ptr is a ptr that
+  /// can be casted to / from all other target address spaces. If the target
+  /// exposes no such address space / does not care, we return 0.
+  virtual unsigned getFlatPtrAddressSpace() const {
+return 0;
+  }
+
   /// \returns If a target requires an address within a target specific address
   /// space \p AddressSpace to be converted in order to be used, then return 
the
   /// corresponding target specific DWARF address space.
diff --git a/clang/lib/Basic/Targets/AMDGPU.h b/clang/lib/Basic/Targets/AMDGPU.h
index 94d9ba93ed226..d06c7d58fe94c 100644
--- a/clang/lib/Basic/Targets/AMDGPU.h
+++ b/clang/lib/Basic/Targets/AMDGPU.h
@@ -379,6 +379,12 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUTargetInfo final : 
public TargetInfo {
 return static_cast(llvm::AMDGPUAS::CONSTANT_ADDRESS);
   }
 
+  /// \returns Target specific flat ptr address space; a flat ptr is a ptr that
+  /// can be casted to / from all other target address spaces.
+  unsigned getFlatPtrAddressSpace() const override {
+return static_cast(llvm::AMDGPUAS::FLAT_ADDRESS);
+  }
+
   /// \returns If a target requires an address within a target specific address
   /// space \p AddressSpace to be converted in order to be used, then return 
the
   /// corresponding target specific DWARF address space.
diff --git a/clang/lib/Basic/Targets/SPIR.h b/clang/lib/Basic/Targets/SPIR.h
index 37cf9d7921bac..14d235bace960 100644
--- a/clang/lib/Basic/Targets/SPIR.h
+++ b/clang/lib/Basic/Targets/SPIR.h
@@ -182,6 +182,10 @@ class LLVM_LIBRARY_VISIBILITY BaseSPIRTargetInfo : public 
TargetInfo {
 return TargetInfo::VoidPtrBuiltinVaList;
   }
 
+  unsigned getFlatPtrAddressSpace() const override {
+return 4u; // 4 is generic i.e. flat for SPIR & SPIR-V.
+  }
+
   std::optional
   getDWARFAddressSpace(unsigned AddressSpace) const override {
 return AddressSpace;

>From 346877d118700a748bffa8b0aee8633d6991582c Mon Sep 17 00:00:00 2001
From: Alex Voicu 
Date: Mon, 17 Jun 2024 12:58:04 +0100
Subject: [PATCH 2/2] Fix formatting.

---
 clang/include/clang/Basic/TargetInfo.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/clang/include/clang/Basic/TargetInfo.h 
b/clang/include/clang/Basic/TargetInfo.h
index 8841ec5f910d9..a3b60db122cae 100644
--- a/clang/include/clang/Basic/TargetInfo.h
+++ b/clang/include/clang/Basic/TargetInfo.h
@@ -1767,9 +1767,7 @@ class TargetInfo : public TransferrableTargetInfo,
   /// \returns Target specific flat ptr address space; a flat ptr is a ptr that
   /// can be casted to / from all other target address spaces. If the target
   /// exposes no such address space / does not care, we return 0.
-  virtual unsigned getFlatPtrAddressSpace() const {
-return 0;
-  }
+  virtual unsigned getFlatPtrAddressSpace() const { return 0; }
 
   /// \returns If a target requires an address within a target specific address
   /// space \p AddressSpace to be converted in order to be used, then return 
the

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [clang][CodeGen] Add query for a target's flat address space (PR #95728)

2024-06-17 Thread Alex Voicu via cfe-commits


@@ -1764,6 +1764,13 @@ class TargetInfo : public TransferrableTargetInfo,
 return 0;
   }
 
+  /// \returns Target specific flat ptr address space; a flat ptr is a ptr that
+  /// can be casted to / from all other target address spaces. If the target
+  /// exposes no such address space / does not care, we return 0.

AlexVlx wrote:

There's no such thing as LangAS::Generic, there's LangAS::Default. Default 
doesn't have to map to something that's actually useful, and it actually does 
not. There's no actual mechanism to get a flat pointer. And I have no idea what 
notion spreading you are talking about - this merely maintains the status quo, 
wherein everybody grabs an AS 0 pointer and hopes for the best. So I have no 
idea what you are talking about, if I'm honest.

https://github.com/llvm/llvm-project/pull/95728
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [clang][CodeGen] Add query for a target's flat address space (PR #95728)

2024-06-16 Thread Alex Voicu via cfe-commits

https://github.com/AlexVlx updated 
https://github.com/llvm/llvm-project/pull/95728

>From 2b500ad9ef2baf27da29146b5a4123dcb75e Mon Sep 17 00:00:00 2001
From: Alex Voicu 
Date: Mon, 17 Jun 2024 02:15:00 +0100
Subject: [PATCH] Add interface for exposing a target's flat address space, if
 it exists.

---
 clang/include/clang/Basic/TargetInfo.h | 7 +++
 clang/lib/Basic/Targets/AMDGPU.h   | 6 ++
 clang/lib/Basic/Targets/SPIR.h | 4 
 3 files changed, 17 insertions(+)

diff --git a/clang/include/clang/Basic/TargetInfo.h 
b/clang/include/clang/Basic/TargetInfo.h
index 8a6511b9ced83..8841ec5f910d9 100644
--- a/clang/include/clang/Basic/TargetInfo.h
+++ b/clang/include/clang/Basic/TargetInfo.h
@@ -1764,6 +1764,13 @@ class TargetInfo : public TransferrableTargetInfo,
 return 0;
   }
 
+  /// \returns Target specific flat ptr address space; a flat ptr is a ptr that
+  /// can be casted to / from all other target address spaces. If the target
+  /// exposes no such address space / does not care, we return 0.
+  virtual unsigned getFlatPtrAddressSpace() const {
+return 0;
+  }
+
   /// \returns If a target requires an address within a target specific address
   /// space \p AddressSpace to be converted in order to be used, then return 
the
   /// corresponding target specific DWARF address space.
diff --git a/clang/lib/Basic/Targets/AMDGPU.h b/clang/lib/Basic/Targets/AMDGPU.h
index 94d9ba93ed226..d06c7d58fe94c 100644
--- a/clang/lib/Basic/Targets/AMDGPU.h
+++ b/clang/lib/Basic/Targets/AMDGPU.h
@@ -379,6 +379,12 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUTargetInfo final : 
public TargetInfo {
 return static_cast(llvm::AMDGPUAS::CONSTANT_ADDRESS);
   }
 
+  /// \returns Target specific flat ptr address space; a flat ptr is a ptr that
+  /// can be casted to / from all other target address spaces.
+  unsigned getFlatPtrAddressSpace() const override {
+return static_cast(llvm::AMDGPUAS::FLAT_ADDRESS);
+  }
+
   /// \returns If a target requires an address within a target specific address
   /// space \p AddressSpace to be converted in order to be used, then return 
the
   /// corresponding target specific DWARF address space.
diff --git a/clang/lib/Basic/Targets/SPIR.h b/clang/lib/Basic/Targets/SPIR.h
index 37cf9d7921bac..14d235bace960 100644
--- a/clang/lib/Basic/Targets/SPIR.h
+++ b/clang/lib/Basic/Targets/SPIR.h
@@ -182,6 +182,10 @@ class LLVM_LIBRARY_VISIBILITY BaseSPIRTargetInfo : public 
TargetInfo {
 return TargetInfo::VoidPtrBuiltinVaList;
   }
 
+  unsigned getFlatPtrAddressSpace() const override {
+return 4u; // 4 is generic i.e. flat for SPIR & SPIR-V.
+  }
+
   std::optional
   getDWARFAddressSpace(unsigned AddressSpace) const override {
 return AddressSpace;

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [clang][CodeGen] Add query for a target's flat address space (PR #95728)

2024-06-16 Thread Alex Voicu via cfe-commits

https://github.com/AlexVlx edited 
https://github.com/llvm/llvm-project/pull/95728
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [clang][CodeGen] Add query for a target's flat address space (PR #95728)

2024-06-16 Thread Alex Voicu via cfe-commits

https://github.com/AlexVlx created 
https://github.com/llvm/llvm-project/pull/95728

Often, targets which are not address space agnostic expose a flat/generic 
address space, which acts as a shared, legal target for address space casts. 
Whilst today we accidentally (e.g. by using `PointerType::getUnqual`) treat 0 
as corresponding to this flat address space, there is no binding requirement 
placed on targets in this regard, which leads to issues such as those reflected 
in #93601 and #93914. This patch adds a `getFlatPtrAddressSpace()` interface in 
`TargetInfo`, allowing targets to inform the front-end. A possible alternative 
name would be `getGenericPtrAddressSpace()`, but that was not chosen since 
generic has a fairly specific meaning in C++ and it seemed somewhat confusing 
in this context.

The interface is not used anywhere at the moment, but the intention is to 
employ it, for example, to specify the pointer type for the `llvm.used` array's 
elements.

>From 2b500ad9ef2baf27da29146b5a4123dcb75e Mon Sep 17 00:00:00 2001
From: Alex Voicu 
Date: Mon, 17 Jun 2024 02:15:00 +0100
Subject: [PATCH] Add interface for exposing a target's flat address space, if
 it exists.

---
 clang/include/clang/Basic/TargetInfo.h | 7 +++
 clang/lib/Basic/Targets/AMDGPU.h   | 6 ++
 clang/lib/Basic/Targets/SPIR.h | 4 
 3 files changed, 17 insertions(+)

diff --git a/clang/include/clang/Basic/TargetInfo.h 
b/clang/include/clang/Basic/TargetInfo.h
index 8a6511b9ced83..8841ec5f910d9 100644
--- a/clang/include/clang/Basic/TargetInfo.h
+++ b/clang/include/clang/Basic/TargetInfo.h
@@ -1764,6 +1764,13 @@ class TargetInfo : public TransferrableTargetInfo,
 return 0;
   }
 
+  /// \returns Target specific flat ptr address space; a flat ptr is a ptr that
+  /// can be casted to / from all other target address spaces. If the target
+  /// exposes no such address space / does not care, we return 0.
+  virtual unsigned getFlatPtrAddressSpace() const {
+return 0;
+  }
+
   /// \returns If a target requires an address within a target specific address
   /// space \p AddressSpace to be converted in order to be used, then return 
the
   /// corresponding target specific DWARF address space.
diff --git a/clang/lib/Basic/Targets/AMDGPU.h b/clang/lib/Basic/Targets/AMDGPU.h
index 94d9ba93ed226..d06c7d58fe94c 100644
--- a/clang/lib/Basic/Targets/AMDGPU.h
+++ b/clang/lib/Basic/Targets/AMDGPU.h
@@ -379,6 +379,12 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUTargetInfo final : 
public TargetInfo {
 return static_cast(llvm::AMDGPUAS::CONSTANT_ADDRESS);
   }
 
+  /// \returns Target specific flat ptr address space; a flat ptr is a ptr that
+  /// can be casted to / from all other target address spaces.
+  unsigned getFlatPtrAddressSpace() const override {
+return static_cast(llvm::AMDGPUAS::FLAT_ADDRESS);
+  }
+
   /// \returns If a target requires an address within a target specific address
   /// space \p AddressSpace to be converted in order to be used, then return 
the
   /// corresponding target specific DWARF address space.
diff --git a/clang/lib/Basic/Targets/SPIR.h b/clang/lib/Basic/Targets/SPIR.h
index 37cf9d7921bac..14d235bace960 100644
--- a/clang/lib/Basic/Targets/SPIR.h
+++ b/clang/lib/Basic/Targets/SPIR.h
@@ -182,6 +182,10 @@ class LLVM_LIBRARY_VISIBILITY BaseSPIRTargetInfo : public 
TargetInfo {
 return TargetInfo::VoidPtrBuiltinVaList;
   }
 
+  unsigned getFlatPtrAddressSpace() const override {
+return 4u; // 4 is generic i.e. flat for SPIR & SPIR-V.
+  }
+
   std::optional
   getDWARFAddressSpace(unsigned AddressSpace) const override {
 return AddressSpace;

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [clang][Driver] Add HIPAMD Driver support for AMDGCN flavoured SPIR-V (PR #95061)

2024-06-12 Thread Alex Voicu via cfe-commits


@@ -128,12 +128,13 @@ enum class CudaArch {
   GFX12_GENERIC,
   GFX1200,
   GFX1201,
+  AMDGCNSPIRV,
   Generic, // A processor model named 'generic' if the target backend defines a
// public one.
   LAST,
 
   CudaDefault = CudaArch::SM_52,
-  HIPDefault = CudaArch::GFX906,
+  HIPDefault = CudaArch::AMDGCNSPIRV,

AlexVlx wrote:

Done.

https://github.com/llvm/llvm-project/pull/95061
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [clang][Driver] Add HIPAMD Driver support for AMDGCN flavoured SPIR-V (PR #95061)

2024-06-10 Thread Alex Voicu via cfe-commits


@@ -128,12 +128,13 @@ enum class CudaArch {
   GFX12_GENERIC,
   GFX1200,
   GFX1201,
+  AMDGCNSPIRV,
   Generic, // A processor model named 'generic' if the target backend defines a
// public one.
   LAST,
 
   CudaDefault = CudaArch::SM_52,
-  HIPDefault = CudaArch::GFX906,
+  HIPDefault = CudaArch::AMDGCNSPIRV,

AlexVlx wrote:

In the long run it should not (the BE is almost baked, for example); in the 
interim, it will need the translator, and I (think I) do see what you are 
saying, that it might be onerous to have HIPAMD depend on that - I can unflip 
the default pending the BE coming along. The only cost would be slightly more 
dubious ergonomics, but those are pre-existing.

https://github.com/llvm/llvm-project/pull/95061
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [clang][Driver] Add HIPAMD Driver support for AMDGCN flavoured SPIR-V (PR #95061)

2024-06-10 Thread Alex Voicu via cfe-commits


@@ -128,12 +128,13 @@ enum class CudaArch {
   GFX12_GENERIC,
   GFX1200,
   GFX1201,
+  AMDGCNSPIRV,
   Generic, // A processor model named 'generic' if the target backend defines a
// public one.
   LAST,
 
   CudaDefault = CudaArch::SM_52,
-  HIPDefault = CudaArch::GFX906,
+  HIPDefault = CudaArch::AMDGCNSPIRV,

AlexVlx wrote:

Yes, the intent is for HIP go to to SPIRV for this case, since that'll maximise 
"usability" of the compiled binary, for lack of a better word.

https://github.com/llvm/llvm-project/pull/95061
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [clang][CodeGen] Make `UnqualPtrTy` truly unqualified (PR #94388)

2024-06-07 Thread Alex Voicu via cfe-commits

AlexVlx wrote:

> There are still quite a few references to UnqualPtrTy that you're not 
> changing yet are now wrong, no?
> 
> ```
> $ git grep --count '\' origin/main clang
> origin/main:clang/lib/CodeGen/CGAtomic.cpp:1
> origin/main:clang/lib/CodeGen/CGBlocks.cpp:1
> origin/main:clang/lib/CodeGen/CGBuiltin.cpp:13
> origin/main:clang/lib/CodeGen/CGCUDANV.cpp:1
> origin/main:clang/lib/CodeGen/CGExpr.cpp:2
> origin/main:clang/lib/CodeGen/CGObjCMac.cpp:1
> origin/main:clang/lib/CodeGen/CodeGenTypeCache.h:1
> origin/main:clang/lib/CodeGen/ItaniumCXXABI.cpp:10
> origin/main:clang/lib/CodeGen/Targets/PPC.cpp:1
> origin/main:clang/lib/CodeGen/Targets/Sparc.cpp:1
> ```

Right, going through these at the moment, I got sidetracked. 

https://github.com/llvm/llvm-project/pull/94388
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [clang][CodeGen] Make `UnqualPtrTy` truly unqualified (PR #94388)

2024-06-07 Thread Alex Voicu via cfe-commits

AlexVlx wrote:

> And I still strongly urge renaming what this is, given it is _not_ the type 
> for an unqualified C `void *`, as one would normally expect given it's in 
> Clang. Perhaps DummyPtrTy?

DummyPtrTy seems mnemonic / on point, but it might trigger antibodies from the 
LLVM side:) Perhaps `NoAddrSpacePtrTy` or something to that extent? What makes 
this unqualified is it being just a `ptr` and not a `ptr addrspace(n)`. On the 
other hand, since today it's an unconditional alias for `ptr addrspace(0)`, 
perhaps `AS0PtrTy` or somesuch thing could also work?

https://github.com/llvm/llvm-project/pull/94388
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [clang][SPIR-V] Add support for AMDGCN flavoured SPIRV (PR #89796)

2024-06-07 Thread Alex Voicu via cfe-commits

AlexVlx wrote:

Thank you everyone for the reviews!

https://github.com/llvm/llvm-project/pull/89796
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [clang][SPIR-V] Add support for AMDGCN flavoured SPIRV (PR #89796)

2024-06-07 Thread Alex Voicu via cfe-commits

https://github.com/AlexVlx closed 
https://github.com/llvm/llvm-project/pull/89796
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [clang][CodeGen] Make `UnqualPtrTy` truly unqualified (PR #94388)

2024-06-06 Thread Alex Voicu via cfe-commits

https://github.com/AlexVlx updated 
https://github.com/llvm/llvm-project/pull/94388

>From cdf95a804614950167d2d4df227d06a86a70d4bb Mon Sep 17 00:00:00 2001
From: Alex Voicu 
Date: Tue, 4 Jun 2024 19:52:28 +0100
Subject: [PATCH 1/2] Unqualified `ptr`s should be truly unqualified, and not
 AS0 ptrs.

---
 clang/lib/CodeGen/CodeGenModule.cpp  | 1 +
 clang/lib/CodeGen/CodeGenTypeCache.h | 4 +++-
 clang/lib/CodeGen/ItaniumCXXABI.cpp  | 6 +++---
 3 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/clang/lib/CodeGen/CodeGenModule.cpp 
b/clang/lib/CodeGen/CodeGenModule.cpp
index be7bf0b72dc0c..b58ca03ce69da 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -368,6 +368,7 @@ CodeGenModule::CodeGenModule(ASTContext &C,
   IntTy = llvm::IntegerType::get(LLVMContext, C.getTargetInfo().getIntWidth());
   IntPtrTy = llvm::IntegerType::get(LLVMContext,
 C.getTargetInfo().getMaxPointerWidth());
+  UnqualPtrTy = llvm::PointerType::getUnqual(LLVMContext);
   Int8PtrTy = llvm::PointerType::get(LLVMContext,
  C.getTargetAddressSpace(LangAS::Default));
   const llvm::DataLayout &DL = M.getDataLayout();
diff --git a/clang/lib/CodeGen/CodeGenTypeCache.h 
b/clang/lib/CodeGen/CodeGenTypeCache.h
index e273ebe3b060f..3b659bc182aa4 100644
--- a/clang/lib/CodeGen/CodeGenTypeCache.h
+++ b/clang/lib/CodeGen/CodeGenTypeCache.h
@@ -51,9 +51,11 @@ struct CodeGenTypeCache {
 llvm::IntegerType *PtrDiffTy;
   };
 
+  /// unqualified void*
+  llvm::PointerType *UnqualPtrTy;
+
   /// void*, void** in the target's default address space (often 0)
   union {
-llvm::PointerType *UnqualPtrTy;
 llvm::PointerType *VoidPtrTy;
 llvm::PointerType *Int8PtrTy;
 llvm::PointerType *VoidPtrPtrTy;
diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp 
b/clang/lib/CodeGen/ItaniumCXXABI.cpp
index 8427286dee887..8952e7ca072da 100644
--- a/clang/lib/CodeGen/ItaniumCXXABI.cpp
+++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp
@@ -4656,14 +4656,14 @@ static void InitCatchParam(CodeGenFunction &CGF,
   auto catchRD = CatchType->getAsCXXRecordDecl();
   CharUnits caughtExnAlignment = CGF.CGM.getClassPointerAlignment(catchRD);
 
-  llvm::Type *PtrTy = CGF.UnqualPtrTy; // addrspace 0 ok
+  llvm::Type *PtrTy = CGF.UnqualPtrTy; // unqualified is ok
 
   // Check for a copy expression.  If we don't have a copy expression,
   // that means a trivial copy is okay.
   const Expr *copyExpr = CatchParam.getInit();
   if (!copyExpr) {
 llvm::Value *rawAdjustedExn = CallBeginCatch(CGF, Exn, true);
-Address adjustedExn(CGF.Builder.CreateBitCast(rawAdjustedExn, PtrTy),
+Address adjustedExn(CGF.Builder.CreatePointerCast(rawAdjustedExn, PtrTy),
 LLVMCatchTy, caughtExnAlignment);
 LValue Dest = CGF.MakeAddrLValue(ParamAddr, CatchType);
 LValue Src = CGF.MakeAddrLValue(adjustedExn, CatchType);
@@ -4677,7 +4677,7 @@ static void InitCatchParam(CodeGenFunction &CGF,
 CGF.EmitNounwindRuntimeCall(getGetExceptionPtrFn(CGF.CGM), Exn);
 
   // Cast that to the appropriate type.
-  Address adjustedExn(CGF.Builder.CreateBitCast(rawAdjustedExn, PtrTy),
+  Address adjustedExn(CGF.Builder.CreatePointerCast(rawAdjustedExn, PtrTy),
   LLVMCatchTy, caughtExnAlignment);
 
   // The copy expression is defined in terms of an OpaqueValueExpr.

>From 5eef3a90a2dcfa08aa84bef3f40db17845911039 Mon Sep 17 00:00:00 2001
From: Alex Voicu 
Date: Thu, 6 Jun 2024 16:40:24 +0100
Subject: [PATCH 2/2] Pointer to default should work.

---
 clang/lib/CodeGen/ItaniumCXXABI.cpp | 9 ++---
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp 
b/clang/lib/CodeGen/ItaniumCXXABI.cpp
index 8952e7ca072da..0c4f0c71a41e5 100644
--- a/clang/lib/CodeGen/ItaniumCXXABI.cpp
+++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp
@@ -4656,15 +4656,12 @@ static void InitCatchParam(CodeGenFunction &CGF,
   auto catchRD = CatchType->getAsCXXRecordDecl();
   CharUnits caughtExnAlignment = CGF.CGM.getClassPointerAlignment(catchRD);
 
-  llvm::Type *PtrTy = CGF.UnqualPtrTy; // unqualified is ok
-
   // Check for a copy expression.  If we don't have a copy expression,
   // that means a trivial copy is okay.
   const Expr *copyExpr = CatchParam.getInit();
   if (!copyExpr) {
 llvm::Value *rawAdjustedExn = CallBeginCatch(CGF, Exn, true);
-Address adjustedExn(CGF.Builder.CreatePointerCast(rawAdjustedExn, PtrTy),
-LLVMCatchTy, caughtExnAlignment);
+Address adjustedExn(rawAdjustedExn, LLVMCatchTy, caughtExnAlignment);
 LValue Dest = CGF.MakeAddrLValue(ParamAddr, CatchType);
 LValue Src = CGF.MakeAddrLValue(adjustedExn, CatchType);
 CGF.EmitAggregateCopy(Dest, Src, CatchType, AggValueSlot::DoesNotOverlap);
@@ -4676,9 +4673,7 @@ static void InitCatchParam(CodeGenFunction &CGF,
   llvm::CallInst *rawAdjustedExn =
 CGF.EmitNounwindRuntimeCall(getGetExceptionPtrFn(CGF.CGM), Exn);
 
-  // Cast that t

[clang] [clang][CodeGen] Make `UnqualPtrTy` truly unqualified (PR #94388)

2024-06-06 Thread Alex Voicu via cfe-commits

https://github.com/AlexVlx updated 
https://github.com/llvm/llvm-project/pull/94388

>From cdf95a804614950167d2d4df227d06a86a70d4bb Mon Sep 17 00:00:00 2001
From: Alex Voicu 
Date: Tue, 4 Jun 2024 19:52:28 +0100
Subject: [PATCH 1/2] Unqualified `ptr`s should be truly unqualified, and not
 AS0 ptrs.

---
 clang/lib/CodeGen/CodeGenModule.cpp  | 1 +
 clang/lib/CodeGen/CodeGenTypeCache.h | 4 +++-
 clang/lib/CodeGen/ItaniumCXXABI.cpp  | 6 +++---
 3 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/clang/lib/CodeGen/CodeGenModule.cpp 
b/clang/lib/CodeGen/CodeGenModule.cpp
index be7bf0b72dc0c..b58ca03ce69da 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -368,6 +368,7 @@ CodeGenModule::CodeGenModule(ASTContext &C,
   IntTy = llvm::IntegerType::get(LLVMContext, C.getTargetInfo().getIntWidth());
   IntPtrTy = llvm::IntegerType::get(LLVMContext,
 C.getTargetInfo().getMaxPointerWidth());
+  UnqualPtrTy = llvm::PointerType::getUnqual(LLVMContext);
   Int8PtrTy = llvm::PointerType::get(LLVMContext,
  C.getTargetAddressSpace(LangAS::Default));
   const llvm::DataLayout &DL = M.getDataLayout();
diff --git a/clang/lib/CodeGen/CodeGenTypeCache.h 
b/clang/lib/CodeGen/CodeGenTypeCache.h
index e273ebe3b060f..3b659bc182aa4 100644
--- a/clang/lib/CodeGen/CodeGenTypeCache.h
+++ b/clang/lib/CodeGen/CodeGenTypeCache.h
@@ -51,9 +51,11 @@ struct CodeGenTypeCache {
 llvm::IntegerType *PtrDiffTy;
   };
 
+  /// unqualified void*
+  llvm::PointerType *UnqualPtrTy;
+
   /// void*, void** in the target's default address space (often 0)
   union {
-llvm::PointerType *UnqualPtrTy;
 llvm::PointerType *VoidPtrTy;
 llvm::PointerType *Int8PtrTy;
 llvm::PointerType *VoidPtrPtrTy;
diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp 
b/clang/lib/CodeGen/ItaniumCXXABI.cpp
index 8427286dee887..8952e7ca072da 100644
--- a/clang/lib/CodeGen/ItaniumCXXABI.cpp
+++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp
@@ -4656,14 +4656,14 @@ static void InitCatchParam(CodeGenFunction &CGF,
   auto catchRD = CatchType->getAsCXXRecordDecl();
   CharUnits caughtExnAlignment = CGF.CGM.getClassPointerAlignment(catchRD);
 
-  llvm::Type *PtrTy = CGF.UnqualPtrTy; // addrspace 0 ok
+  llvm::Type *PtrTy = CGF.UnqualPtrTy; // unqualified is ok
 
   // Check for a copy expression.  If we don't have a copy expression,
   // that means a trivial copy is okay.
   const Expr *copyExpr = CatchParam.getInit();
   if (!copyExpr) {
 llvm::Value *rawAdjustedExn = CallBeginCatch(CGF, Exn, true);
-Address adjustedExn(CGF.Builder.CreateBitCast(rawAdjustedExn, PtrTy),
+Address adjustedExn(CGF.Builder.CreatePointerCast(rawAdjustedExn, PtrTy),
 LLVMCatchTy, caughtExnAlignment);
 LValue Dest = CGF.MakeAddrLValue(ParamAddr, CatchType);
 LValue Src = CGF.MakeAddrLValue(adjustedExn, CatchType);
@@ -4677,7 +4677,7 @@ static void InitCatchParam(CodeGenFunction &CGF,
 CGF.EmitNounwindRuntimeCall(getGetExceptionPtrFn(CGF.CGM), Exn);
 
   // Cast that to the appropriate type.
-  Address adjustedExn(CGF.Builder.CreateBitCast(rawAdjustedExn, PtrTy),
+  Address adjustedExn(CGF.Builder.CreatePointerCast(rawAdjustedExn, PtrTy),
   LLVMCatchTy, caughtExnAlignment);
 
   // The copy expression is defined in terms of an OpaqueValueExpr.

>From 5eef3a90a2dcfa08aa84bef3f40db17845911039 Mon Sep 17 00:00:00 2001
From: Alex Voicu 
Date: Thu, 6 Jun 2024 16:40:24 +0100
Subject: [PATCH 2/2] Pointer to default should work.

---
 clang/lib/CodeGen/ItaniumCXXABI.cpp | 9 ++---
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp 
b/clang/lib/CodeGen/ItaniumCXXABI.cpp
index 8952e7ca072da..0c4f0c71a41e5 100644
--- a/clang/lib/CodeGen/ItaniumCXXABI.cpp
+++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp
@@ -4656,15 +4656,12 @@ static void InitCatchParam(CodeGenFunction &CGF,
   auto catchRD = CatchType->getAsCXXRecordDecl();
   CharUnits caughtExnAlignment = CGF.CGM.getClassPointerAlignment(catchRD);
 
-  llvm::Type *PtrTy = CGF.UnqualPtrTy; // unqualified is ok
-
   // Check for a copy expression.  If we don't have a copy expression,
   // that means a trivial copy is okay.
   const Expr *copyExpr = CatchParam.getInit();
   if (!copyExpr) {
 llvm::Value *rawAdjustedExn = CallBeginCatch(CGF, Exn, true);
-Address adjustedExn(CGF.Builder.CreatePointerCast(rawAdjustedExn, PtrTy),
-LLVMCatchTy, caughtExnAlignment);
+Address adjustedExn(rawAdjustedExn, LLVMCatchTy, caughtExnAlignment);
 LValue Dest = CGF.MakeAddrLValue(ParamAddr, CatchType);
 LValue Src = CGF.MakeAddrLValue(adjustedExn, CatchType);
 CGF.EmitAggregateCopy(Dest, Src, CatchType, AggValueSlot::DoesNotOverlap);
@@ -4676,9 +4673,7 @@ static void InitCatchParam(CodeGenFunction &CGF,
   llvm::CallInst *rawAdjustedExn =
 CGF.EmitNounwindRuntimeCall(getGetExceptionPtrFn(CGF.CGM), Exn);
 
-  // Cast that t

[clang] [clang][CodeGen] Make `UnqualPtrTy` truly unqualified (PR #94388)

2024-06-06 Thread Alex Voicu via cfe-commits

https://github.com/AlexVlx updated 
https://github.com/llvm/llvm-project/pull/94388

>From cdf95a804614950167d2d4df227d06a86a70d4bb Mon Sep 17 00:00:00 2001
From: Alex Voicu 
Date: Tue, 4 Jun 2024 19:52:28 +0100
Subject: [PATCH] Unqualified `ptr`s should be truly unqualified, and not AS0
 ptrs.

---
 clang/lib/CodeGen/CodeGenModule.cpp  | 1 +
 clang/lib/CodeGen/CodeGenTypeCache.h | 4 +++-
 clang/lib/CodeGen/ItaniumCXXABI.cpp  | 6 +++---
 3 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/clang/lib/CodeGen/CodeGenModule.cpp 
b/clang/lib/CodeGen/CodeGenModule.cpp
index be7bf0b72dc0c..b58ca03ce69da 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -368,6 +368,7 @@ CodeGenModule::CodeGenModule(ASTContext &C,
   IntTy = llvm::IntegerType::get(LLVMContext, C.getTargetInfo().getIntWidth());
   IntPtrTy = llvm::IntegerType::get(LLVMContext,
 C.getTargetInfo().getMaxPointerWidth());
+  UnqualPtrTy = llvm::PointerType::getUnqual(LLVMContext);
   Int8PtrTy = llvm::PointerType::get(LLVMContext,
  C.getTargetAddressSpace(LangAS::Default));
   const llvm::DataLayout &DL = M.getDataLayout();
diff --git a/clang/lib/CodeGen/CodeGenTypeCache.h 
b/clang/lib/CodeGen/CodeGenTypeCache.h
index e273ebe3b060f..3b659bc182aa4 100644
--- a/clang/lib/CodeGen/CodeGenTypeCache.h
+++ b/clang/lib/CodeGen/CodeGenTypeCache.h
@@ -51,9 +51,11 @@ struct CodeGenTypeCache {
 llvm::IntegerType *PtrDiffTy;
   };
 
+  /// unqualified void*
+  llvm::PointerType *UnqualPtrTy;
+
   /// void*, void** in the target's default address space (often 0)
   union {
-llvm::PointerType *UnqualPtrTy;
 llvm::PointerType *VoidPtrTy;
 llvm::PointerType *Int8PtrTy;
 llvm::PointerType *VoidPtrPtrTy;
diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp 
b/clang/lib/CodeGen/ItaniumCXXABI.cpp
index 8427286dee887..8952e7ca072da 100644
--- a/clang/lib/CodeGen/ItaniumCXXABI.cpp
+++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp
@@ -4656,14 +4656,14 @@ static void InitCatchParam(CodeGenFunction &CGF,
   auto catchRD = CatchType->getAsCXXRecordDecl();
   CharUnits caughtExnAlignment = CGF.CGM.getClassPointerAlignment(catchRD);
 
-  llvm::Type *PtrTy = CGF.UnqualPtrTy; // addrspace 0 ok
+  llvm::Type *PtrTy = CGF.UnqualPtrTy; // unqualified is ok
 
   // Check for a copy expression.  If we don't have a copy expression,
   // that means a trivial copy is okay.
   const Expr *copyExpr = CatchParam.getInit();
   if (!copyExpr) {
 llvm::Value *rawAdjustedExn = CallBeginCatch(CGF, Exn, true);
-Address adjustedExn(CGF.Builder.CreateBitCast(rawAdjustedExn, PtrTy),
+Address adjustedExn(CGF.Builder.CreatePointerCast(rawAdjustedExn, PtrTy),
 LLVMCatchTy, caughtExnAlignment);
 LValue Dest = CGF.MakeAddrLValue(ParamAddr, CatchType);
 LValue Src = CGF.MakeAddrLValue(adjustedExn, CatchType);
@@ -4677,7 +4677,7 @@ static void InitCatchParam(CodeGenFunction &CGF,
 CGF.EmitNounwindRuntimeCall(getGetExceptionPtrFn(CGF.CGM), Exn);
 
   // Cast that to the appropriate type.
-  Address adjustedExn(CGF.Builder.CreateBitCast(rawAdjustedExn, PtrTy),
+  Address adjustedExn(CGF.Builder.CreatePointerCast(rawAdjustedExn, PtrTy),
   LLVMCatchTy, caughtExnAlignment);
 
   // The copy expression is defined in terms of an OpaqueValueExpr.

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


  1   2   3   >