[PATCH] D140226: [NVPTX] Introduce attribute to mark kernels without a language mode

2023-03-24 Thread Joseph Huber via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG29a5c3c8fe30: [NVPTX] Introduce attribute to mark kernels 
without a language mode (authored by jhuber6).

Changed prior to commit:
  https://reviews.llvm.org/D140226?vs=508170=508197#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D140226/new/

https://reviews.llvm.org/D140226

Files:
  clang/include/clang/Basic/Attr.td
  clang/lib/CodeGen/TargetInfo.cpp
  clang/lib/Sema/SemaDeclAttr.cpp
  clang/test/CodeGen/nvptx_attributes.c
  clang/test/Misc/pragma-attribute-supported-attributes-list.test


Index: clang/test/Misc/pragma-attribute-supported-attributes-list.test
===
--- clang/test/Misc/pragma-attribute-supported-attributes-list.test
+++ clang/test/Misc/pragma-attribute-supported-attributes-list.test
@@ -94,6 +94,7 @@
 // CHECK-NEXT: NSConsumed (SubjectMatchRule_variable_is_parameter)
 // CHECK-NEXT: NSConsumesSelf (SubjectMatchRule_objc_method)
 // CHECK-NEXT: NSErrorDomain (SubjectMatchRule_enum)
+// CHECK-NEXT: NVPTXKernel (SubjectMatchRule_function)
 // CHECK-NEXT: Naked (SubjectMatchRule_function)
 // CHECK-NEXT: NoBuiltin (SubjectMatchRule_function)
 // CHECK-NEXT: NoCommon (SubjectMatchRule_variable)
Index: clang/test/CodeGen/nvptx_attributes.c
===
--- /dev/null
+++ clang/test/CodeGen/nvptx_attributes.c
@@ -0,0 +1,17 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --function-signature --check-attributes --check-globals
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -target-cpu sm_61 -emit-llvm %s 
-o - | FileCheck %s
+
+// CHECK: Function Attrs: noinline nounwind optnone
+// CHECK-LABEL: define {{[^@]+}}@foo
+// CHECK-SAME: (ptr noundef [[RET:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[RET_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:store ptr [[RET]], ptr [[RET_ADDR]], align 8
+// CHECK-NEXT:[[TMP0:%.*]] = load ptr, ptr [[RET_ADDR]], align 8
+// CHECK-NEXT:store i32 1, ptr [[TMP0]], align 4
+// CHECK-NEXT:ret void
+__attribute__((nvptx_kernel)) void foo(int *ret) {
+  *ret = 1;
+}
+
+// CHECK: !0 = !{ptr @foo, !"kernel", i32 1}
Index: clang/lib/Sema/SemaDeclAttr.cpp
===
--- clang/lib/Sema/SemaDeclAttr.cpp
+++ clang/lib/Sema/SemaDeclAttr.cpp
@@ -4970,7 +4970,10 @@
   if (FD->isInlineSpecified() && !S.getLangOpts().CUDAIsDevice)
 S.Diag(FD->getBeginLoc(), diag::warn_kern_is_inline) << FD;
 
-  D->addAttr(::new (S.Context) CUDAGlobalAttr(S.Context, AL));
+  if (AL.getKind() == ParsedAttr::AT_NVPTXKernel)
+D->addAttr(::new (S.Context) NVPTXKernelAttr(S.Context, AL));
+  else
+D->addAttr(::new (S.Context) CUDAGlobalAttr(S.Context, AL));
   // In host compilation the kernel is emitted as a stub function, which is
   // a helper function for launching the kernel. The instructions in the helper
   // function has nothing to do with the source code of the kernel. Do not emit
@@ -8851,6 +8854,7 @@
   case ParsedAttr::AT_CalledOnce:
 handleCalledOnceAttr(S, D, AL);
 break;
+  case ParsedAttr::AT_NVPTXKernel:
   case ParsedAttr::AT_CUDAGlobal:
 handleGlobalAttr(S, D, AL);
 break;
Index: clang/lib/CodeGen/TargetInfo.cpp
===
--- clang/lib/CodeGen/TargetInfo.cpp
+++ clang/lib/CodeGen/TargetInfo.cpp
@@ -7373,6 +7373,11 @@
   }
 }
   }
+
+  // Attach kernel metadata directly if compiling for NVPTX.
+  if (FD->hasAttr()) {
+addNVVMMetadata(F, "kernel", 1);
+  }
 }
 
 void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::GlobalValue *GV,
Index: clang/include/clang/Basic/Attr.td
===
--- clang/include/clang/Basic/Attr.td
+++ clang/include/clang/Basic/Attr.td
@@ -414,6 +414,7 @@
 def TargetX86 : TargetArch<["x86"]>;
 def TargetAnyX86 : TargetArch<["x86", "x86_64"]>;
 def TargetWebAssembly : TargetArch<["wasm32", "wasm64"]>;
+def TargetNVPTX : TargetArch<["nvptx", "nvptx64"]>;
 def TargetWindows : TargetSpec {
   let OSes = ["Win32"];
 }
@@ -1221,6 +1222,12 @@
 }
 def : MutualExclusions<[CUDAGlobal, CUDAHost]>;
 
+def NVPTXKernel : InheritableAttr, TargetSpecificAttr {
+  let Spellings = [Clang<"nvptx_kernel">];
+  let Subjects = SubjectList<[Function]>;
+  let Documentation = [Undocumented];
+}
+
 def HIPManaged : InheritableAttr {
   let Spellings = [GNU<"managed">, Declspec<"__managed__">];
   let Subjects = SubjectList<[Var]>;


Index: clang/test/Misc/pragma-attribute-supported-attributes-list.test
===
--- clang/test/Misc/pragma-attribute-supported-attributes-list.test
+++ 

[PATCH] D140226: [NVPTX] Introduce attribute to mark kernels without a language mode

2023-03-24 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 updated this revision to Diff 508170.
jhuber6 added a comment.

Updating to simply add an entirely new attribute again. The existing
`CUDAGlobal` attribute does what we want, but it's also highly coupled with the
CUDA language. This made it pretty much impossible to find a way to re-use it
without breaking existing functionality. The amount of code duplicated is
minimal and this is required to be able to emit a callable kernel targeting
NVPTX directly. I'd like to use this for my ongoing GPU `libc` project so I'd
appreciate someone looking at this again.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D140226/new/

https://reviews.llvm.org/D140226

Files:
  clang/include/clang/Basic/Attr.td
  clang/lib/CodeGen/TargetInfo.cpp
  clang/lib/Sema/SemaDeclAttr.cpp
  clang/test/CodeGen/nvptx_attributes.c
  clang/test/Misc/pragma-attribute-supported-attributes-list.test

Index: clang/test/Misc/pragma-attribute-supported-attributes-list.test
===
--- clang/test/Misc/pragma-attribute-supported-attributes-list.test
+++ clang/test/Misc/pragma-attribute-supported-attributes-list.test
@@ -94,6 +94,7 @@
 // CHECK-NEXT: NSConsumed (SubjectMatchRule_variable_is_parameter)
 // CHECK-NEXT: NSConsumesSelf (SubjectMatchRule_objc_method)
 // CHECK-NEXT: NSErrorDomain (SubjectMatchRule_enum)
+// CHECK-NEXT: NVPTXKernel (SubjectMatchRule_function)
 // CHECK-NEXT: Naked (SubjectMatchRule_function)
 // CHECK-NEXT: NoBuiltin (SubjectMatchRule_function)
 // CHECK-NEXT: NoCommon (SubjectMatchRule_variable)
Index: clang/test/CodeGen/nvptx_attributes.c
===
--- /dev/null
+++ clang/test/CodeGen/nvptx_attributes.c
@@ -0,0 +1,23 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -target-cpu sm_61 -emit-llvm %s -o - | FileCheck %s
+
+// CHECK: Function Attrs: noinline nounwind optnone
+// CHECK-LABEL: define {{[^@]+}}@foo
+// CHECK-SAME: (ptr noundef [[RET:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[RET_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:store ptr [[RET]], ptr [[RET_ADDR]], align 8
+// CHECK-NEXT:[[TMP0:%.*]] = load ptr, ptr [[RET_ADDR]], align 8
+// CHECK-NEXT:store i32 1, ptr [[TMP0]], align 4
+// CHECK-NEXT:ret void
+//
+__attribute__((nvptx_kernel)) void foo(int *ret) {
+  *ret = 1;
+}
+//.
+// CHECK: attributes #0 = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_61" "target-features"="+ptx32,+sm_61" }
+//.
+// CHECK: !0 = !{ptr @foo, !"kernel", i32 1}
+// CHECK: !1 = !{i32 1, !"wchar_size", i32 4}
+// CHECK: !2 = !{!"clang version 17.0.0"}
+//.
Index: clang/lib/Sema/SemaDeclAttr.cpp
===
--- clang/lib/Sema/SemaDeclAttr.cpp
+++ clang/lib/Sema/SemaDeclAttr.cpp
@@ -4970,7 +4970,10 @@
   if (FD->isInlineSpecified() && !S.getLangOpts().CUDAIsDevice)
 S.Diag(FD->getBeginLoc(), diag::warn_kern_is_inline) << FD;
 
-  D->addAttr(::new (S.Context) CUDAGlobalAttr(S.Context, AL));
+  if (AL.getKind() == ParsedAttr::AT_NVPTXKernel)
+D->addAttr(::new (S.Context) NVPTXKernelAttr(S.Context, AL));
+  else
+D->addAttr(::new (S.Context) CUDAGlobalAttr(S.Context, AL));
   // In host compilation the kernel is emitted as a stub function, which is
   // a helper function for launching the kernel. The instructions in the helper
   // function has nothing to do with the source code of the kernel. Do not emit
@@ -8851,6 +8854,7 @@
   case ParsedAttr::AT_CalledOnce:
 handleCalledOnceAttr(S, D, AL);
 break;
+  case ParsedAttr::AT_NVPTXKernel:
   case ParsedAttr::AT_CUDAGlobal:
 handleGlobalAttr(S, D, AL);
 break;
Index: clang/lib/CodeGen/TargetInfo.cpp
===
--- clang/lib/CodeGen/TargetInfo.cpp
+++ clang/lib/CodeGen/TargetInfo.cpp
@@ -7373,6 +7373,11 @@
   }
 }
   }
+
+  // Attach kernel metadata directly if compiling for NVPTX.
+  if (FD->hasAttr()) {
+addNVVMMetadata(F, "kernel", 1);
+  }
 }
 
 void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::GlobalValue *GV,
Index: clang/include/clang/Basic/Attr.td
===
--- clang/include/clang/Basic/Attr.td
+++ clang/include/clang/Basic/Attr.td
@@ -414,6 +414,7 @@
 def TargetX86 : TargetArch<["x86"]>;
 def TargetAnyX86 : TargetArch<["x86", "x86_64"]>;
 def TargetWebAssembly : TargetArch<["wasm32", "wasm64"]>;
+def TargetNVPTX : TargetArch<["nvptx", "nvptx64"]>;
 def TargetWindows : TargetSpec {
   let OSes = ["Win32"];
 }
@@ -1221,6 +1222,12 @@
 }
 def : MutualExclusions<[CUDAGlobal, CUDAHost]>;
 
+def NVPTXKernel : InheritableAttr, TargetSpecificAttr {
+  let 

[PATCH] D140226: [NVPTX] Introduce attribute to mark kernels without a language mode

2023-03-23 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 added a comment.

@tra would it be possible to go to the earlier version that simply duplicated a 
slight amount of logic to introduce the new and separate attribute 
`nvptx_kernel`? Overloading CUDA's `device` attribute is problematic because 
it's used and checked in several different contexts. I'd like to be able to 
simplify this code 
https://github.com/llvm/llvm-project/blob/main/libc/startup/gpu/nvptx/start.cpp.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D140226/new/

https://reviews.llvm.org/D140226

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D140226: [NVPTX] Introduce attribute to mark kernels without a language mode

2023-01-13 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 added a comment.

In D140226#4052105 , @aaron.ballman 
wrote:

> Precommit CI found failures that look relevant to the patch.
>
>> We may want to be able to mark certain regions as kernels even without being 
>> in an accepted CUDA or OpenCL language mode.
>
> Can you explain this a bit more? Under what circumstances would you want to 
> do this?

Yeah, I need to work on this some more. A previous version worked fine but it 
duplicated some logic, I'm not sure if there's a good way to re-use the 
existing kernel logic without breaking some of the assumptions. The desire was 
to be able to emit a kernel that can be called externally via 
cross-compilation. E.g. `clang foo.c --target=nvptx64-nvidia-cuda`. The 
intended use-case was for testing experimental `libc` implementations using 
integration tests.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D140226/new/

https://reviews.llvm.org/D140226

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D140226: [NVPTX] Introduce attribute to mark kernels without a language mode

2023-01-13 Thread Aaron Ballman via Phabricator via cfe-commits
aaron.ballman added a reviewer: erichkeane.
aaron.ballman added a comment.

Precommit CI found failures that look relevant to the patch.

> We may want to be able to mark certain regions as kernels even without being 
> in an accepted CUDA or OpenCL language mode.

Can you explain this a bit more? Under what circumstances would you want to do 
this?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D140226/new/

https://reviews.llvm.org/D140226

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D140226: [NVPTX] Introduce attribute to mark kernels without a language mode

2022-12-19 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 added inline comments.



Comment at: clang/include/clang/Basic/Attr.td:1198
 
-def CUDAGlobal : InheritableAttr {
-  let Spellings = [GNU<"global">, Declspec<"__global__">];
+def CUDAGlobal : InheritableAttr, TargetSpecificAttr {
+  let Spellings = [GNU<"global">, Declspec<"__global__">, 
Clang<"nvptx_kernel">];

shangwuyao wrote:
> jhuber6 wrote:
> > tra wrote:
> > > Nice.
> > > 
> > > This reminded me that we have a project compiling CUDA, but targeting 
> > > SPIR-V instead of NVPTX. It looks like this will likely break them. The 
> > > project is out-of-tree, but I'd still need to figure out how to keep them 
> > > working.  I guess it would be easy enough to expand TargetNVPTX to 
> > > TargetNVPTXOrSpirV. I'm mostly concerned about logistics of making it 
> > > happen without disruption.
> > > 
> > > 
> > This might've broken more stuff after looking into it, I forgot that 
> > `AMDGPU` still uses the same CUDA attributes, and the host portion of CUDA 
> > also checks these. It would be nice if there was a way to say "CUDA" or 
> > "NVPTX", wondering if that's possible in the tablegen here.
> What's the plan here for keeping the SPIR-V and AMDGPU working? Would it work 
> if we simply get rid of the `TargetSpecificAttr`?
Yeah, it would I'll need to update the patch. The best solution would be if 
there were a way to say "TargetNVPTX or LangOpts.CUDA". Not sure if that's 
possible in Tablegen. The previous diff I had worked fine, but we should 
definitely try to avoid rework.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D140226/new/

https://reviews.llvm.org/D140226

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D140226: [NVPTX] Introduce attribute to mark kernels without a language mode

2022-12-19 Thread Shangwu Yao via Phabricator via cfe-commits
shangwuyao added inline comments.



Comment at: clang/include/clang/Basic/Attr.td:1198
 
-def CUDAGlobal : InheritableAttr {
-  let Spellings = [GNU<"global">, Declspec<"__global__">];
+def CUDAGlobal : InheritableAttr, TargetSpecificAttr {
+  let Spellings = [GNU<"global">, Declspec<"__global__">, 
Clang<"nvptx_kernel">];

jhuber6 wrote:
> tra wrote:
> > Nice.
> > 
> > This reminded me that we have a project compiling CUDA, but targeting 
> > SPIR-V instead of NVPTX. It looks like this will likely break them. The 
> > project is out-of-tree, but I'd still need to figure out how to keep them 
> > working.  I guess it would be easy enough to expand TargetNVPTX to 
> > TargetNVPTXOrSpirV. I'm mostly concerned about logistics of making it 
> > happen without disruption.
> > 
> > 
> This might've broken more stuff after looking into it, I forgot that `AMDGPU` 
> still uses the same CUDA attributes, and the host portion of CUDA also checks 
> these. It would be nice if there was a way to say "CUDA" or "NVPTX", 
> wondering if that's possible in the tablegen here.
What's the plan here for keeping the SPIR-V and AMDGPU working? Would it work 
if we simply get rid of the `TargetSpecificAttr`?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D140226/new/

https://reviews.llvm.org/D140226

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D140226: [NVPTX] Introduce attribute to mark kernels without a language mode

2022-12-18 Thread Jon Chesterfield via Phabricator via cfe-commits
JonChesterfield added a comment.

Calling convention is the right model here. Kernels are functions with a 
different calling convention to the 'normal' functions in a very literal sense. 
The calling convention modelling in clang is different to attribute handling 
and changing nvptx to it is probably invasive, though it seems to me it could 
be done incrementally.

I wouldn't suggest adding a nvptx_kernel calling convention to clang though, 
rather we could repurpose the amdgpu one to be gpu_kernel. Possibly spelled 
nvptx_kernel for the user but represented within clang as gpu_kernel.

Related, I think there's a spirv or opencl kernel representation in llvm for 
amdgpu, I would be interested in collapsing those and the openmp or hip 
annotation to a single thing if possible.

That's all medium term cleanup ideas, current patch looks good to me.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D140226/new/

https://reviews.llvm.org/D140226

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D140226: [NVPTX] Introduce attribute to mark kernels without a language mode

2022-12-18 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 added a comment.

In D140226#4003826 , @tschuett wrote:

> But then you are maybe mixing two concepts. `kernel` is source code or AST 
> feature. nvptx or AMDGPU are command line flags.
>
> CUDA, Sycl, nvtx, and AMDGPU are modes or calling conventions?

The way I understand it, the architecture determines the actual ISA for the 
code and the `kernel` metadata operates like a calling convention for whatever 
"OS" will be executing it. For example, for the triple `amdgcn-amd-amdhsa` we 
generate code for the `amdgcn` architecture and emit kernels such that the 
`hsa` runtime can call them. Similarly, for `nvptx64-nvidia-cuda` we emit code 
for `nvptx64` and our kernels use the calling convention such that the `cuda` 
runtime can call them. I think the main question of this patch is if we can 
separate the `cuda` runtime from the CUDA language. That is, we don't need to 
be using the CUDA language to emit functions that the `cuda` runtime can call. 
So this is more or less thinking of these kernel calls as a calling convention 
for a runtime or operating system rather than as a language feature.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D140226/new/

https://reviews.llvm.org/D140226

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D140226: [NVPTX] Introduce attribute to mark kernels without a language mode

2022-12-18 Thread Thorsten via Phabricator via cfe-commits
tschuett added a comment.

In D140226#4003794 , @jhuber6 wrote:

> In D140226#4003788 , @tschuett 
> wrote:
>
>> There are already SYCL specific attributes: https://reviews.llvm.org/D60455
>
> We could potentially merge these all into some generic attribute since they 
> all do the same thing on a conceptual level. The unique thing about the 
> existing `amdgpu_kernel` and corresponding `nvptx_kernel` is that they don't 
> rely on the language options like `SYCL` or `CUDA`. Though, semantically 
> those are definitely involved because the kernel itself is only meaningful to 
> whatever runtime is going to load it (e.g. CUDA or HSA) but we can probably 
> consider that separately to the compilation itself and just think of these as 
> calling conventions.

But then you are maybe mixing two concepts. `kernel` is source code or AST 
feature. nvptx or AMDGPU are command line flags.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D140226/new/

https://reviews.llvm.org/D140226

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D140226: [NVPTX] Introduce attribute to mark kernels without a language mode

2022-12-18 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 added a comment.

In D140226#4003788 , @tschuett wrote:

> There are already SYCL specific attributes: https://reviews.llvm.org/D60455

We could potentially merge these all into some generic attribute since they all 
do the same thing on a conceptual level. The unique thing about the existing 
`amdgpu_kernel` and corresponding `nvptx_kernel` is that they don't rely on the 
language options like `SYCL` or `CUDA`. Though, semantically those are 
definitely involved because the kernel itself is only meaningful to whatever 
runtime is going to load it (e.g. CUDA or HSA) but we can probably consider 
that separately to the compilation itself and just think of these as calling 
conventions.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D140226/new/

https://reviews.llvm.org/D140226

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D140226: [NVPTX] Introduce attribute to mark kernels without a language mode

2022-12-18 Thread Thorsten via Phabricator via cfe-commits
tschuett added a comment.

There are already SYCL specific attributes: https://reviews.llvm.org/D60455


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D140226/new/

https://reviews.llvm.org/D140226

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D140226: [NVPTX] Introduce attribute to mark kernels without a language mode

2022-12-18 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 added a comment.

In D140226#4003781 , @keryell wrote:

> I wonder whether we could not factorize some code/attribute/logic with AMDGPU 
> or SYCL.
> Is the use case to have for example CUDA+HIP+SYCL in the same TU and thus 
> there is a need for different attributes

It would probably be good to have the high level concept of a "kernel" be 
factored out since this is common between all the offloading languages. The 
actual implementation it gets lowered to would still need to be distinct since 
this usually gets turned into some magic bits stashed in the executable for the 
runtime to read. The use-case for this patch is simply to allow people to 
compile pure C/C++ code to the NVPTX architecture, but still be able to mark 
the necessary metadata for kernels and globals.

I've recently thought if we could just apply the same logic used for shared 
objects with GPU images, that is globals without `hidden` visibility would be 
considered `__global__` and ones with `hidden` visibility would be considered 
`__device__` in CUDA terms. I think the only thing preventing us from thinking 
of a kernel call as a dynamic symbol load is probably the launch parameters. 
But this is purely theoretical, I don't think we need to worry about moving 
away from offloading languages or anything.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D140226/new/

https://reviews.llvm.org/D140226

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D140226: [NVPTX] Introduce attribute to mark kernels without a language mode

2022-12-18 Thread Ronan Keryell via Phabricator via cfe-commits
keryell added subscribers: bader, keryell.
keryell added a comment.

I wonder whether we could not factorize some code/attribute/logic with AMDGPU 
or SYCL.
Is the use case to have for example CUDA+HIP+SYCL in the same TU and thus there 
is a need for different attributes


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D140226/new/

https://reviews.llvm.org/D140226

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D140226: [NVPTX] Introduce attribute to mark kernels without a language mode

2022-12-16 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 added inline comments.



Comment at: clang/include/clang/Basic/Attr.td:1198
 
-def CUDAGlobal : InheritableAttr {
-  let Spellings = [GNU<"global">, Declspec<"__global__">];
+def CUDAGlobal : InheritableAttr, TargetSpecificAttr {
+  let Spellings = [GNU<"global">, Declspec<"__global__">, 
Clang<"nvptx_kernel">];

tra wrote:
> Nice.
> 
> This reminded me that we have a project compiling CUDA, but targeting SPIR-V 
> instead of NVPTX. It looks like this will likely break them. The project is 
> out-of-tree, but I'd still need to figure out how to keep them working.  I 
> guess it would be easy enough to expand TargetNVPTX to TargetNVPTXOrSpirV. 
> I'm mostly concerned about logistics of making it happen without disruption.
> 
> 
This might've broken more stuff after looking into it, I forgot that `AMDGPU` 
still uses the same CUDA attributes, and the host portion of CUDA also checks 
these. It would be nice if there was a way to say "CUDA" or "NVPTX", wondering 
if that's possible in the tablegen here.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D140226/new/

https://reviews.llvm.org/D140226

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D140226: [NVPTX] Introduce attribute to mark kernels without a language mode

2022-12-16 Thread Artem Belevich via Phabricator via cfe-commits
tra added a comment.

LGTM.

General question -- what happens now that the `global` and `launch_bounds` are 
target-specific as opposed to language-specific, if they happen to be used in a 
C++ compilation targeting `x86`? I assume they will still be ignored, right?




Comment at: clang/include/clang/Basic/Attr.td:1198
 
-def CUDAGlobal : InheritableAttr {
-  let Spellings = [GNU<"global">, Declspec<"__global__">];
+def CUDAGlobal : InheritableAttr, TargetSpecificAttr {
+  let Spellings = [GNU<"global">, Declspec<"__global__">, 
Clang<"nvptx_kernel">];

Nice.

This reminded me that we have a project compiling CUDA, but targeting SPIR-V 
instead of NVPTX. It looks like this will likely break them. The project is 
out-of-tree, but I'd still need to figure out how to keep them working.  I 
guess it would be easy enough to expand TargetNVPTX to TargetNVPTXOrSpirV. I'm 
mostly concerned about logistics of making it happen without disruption.




Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D140226/new/

https://reviews.llvm.org/D140226

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D140226: [NVPTX] Introduce attribute to mark kernels without a language mode

2022-12-16 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 updated this revision to Diff 483640.
jhuber6 added a comment.

Changing to use the same CUDA global attributes. This requires a few extra 
checks for whether or not we were in CUDA mode since previously it just assume 
any time we saw one of these globals we were in that mode. I added a different 
spelling as well just for consistency.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D140226/new/

https://reviews.llvm.org/D140226

Files:
  clang/include/clang/Basic/Attr.td
  clang/lib/CodeGen/CodeGenModule.cpp
  clang/lib/CodeGen/TargetInfo.cpp
  clang/test/CodeGen/nvptx_attributes.c

Index: clang/test/CodeGen/nvptx_attributes.c
===
--- /dev/null
+++ clang/test/CodeGen/nvptx_attributes.c
@@ -0,0 +1,51 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -target-cpu sm_61 -emit-llvm %s -o - | FileCheck %s
+// CHECK: Function Attrs: noinline nounwind optnone
+// CHECK-LABEL: define {{[^@]+}}@device
+// CHECK-SAME: () #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:ret i32 1
+//
+int device() {return 1;};
+
+// CHECK: Function Attrs: noinline nounwind optnone
+// CHECK-LABEL: define {{[^@]+}}@foo
+// CHECK-SAME: (ptr noundef [[RET:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[RET_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:store ptr [[RET]], ptr [[RET_ADDR]], align 8
+// CHECK-NEXT:[[CALL:%.*]] = call i32 @device()
+// CHECK-NEXT:[[TMP0:%.*]] = load ptr, ptr [[RET_ADDR]], align 8
+// CHECK-NEXT:store i32 [[CALL]], ptr [[TMP0]], align 4
+// CHECK-NEXT:ret void
+//
+__attribute__((nvptx_kernel)) void foo(int *ret) {
+  *ret = device();
+}
+
+// CHECK: Function Attrs: noinline nounwind optnone
+// CHECK-LABEL: define {{[^@]+}}@bar
+// CHECK-SAME: (ptr noundef [[RET:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[RET_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:store ptr [[RET]], ptr [[RET_ADDR]], align 8
+// CHECK-NEXT:[[CALL:%.*]] = call i32 @device()
+// CHECK-NEXT:[[TMP0:%.*]] = load ptr, ptr [[RET_ADDR]], align 8
+// CHECK-NEXT:store i32 [[CALL]], ptr [[TMP0]], align 4
+// CHECK-NEXT:ret void
+//
+__attribute__((nvptx_kernel, nvptx_launch_bounds(1, 128))) void bar(int *ret) {
+  *ret = device();
+}
+
+
+//.
+// CHECK: attributes #0 = { noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_61" "target-features"="+ptx32,+sm_61" }
+//.
+// CHECK: !0 = !{ptr @foo, !"kernel", i32 1}
+// CHECK: !1 = !{ptr @bar, !"kernel", i32 1}
+// CHECK: !2 = !{ptr @bar, !"maxntidx", i32 1}
+// CHECK: !3 = !{ptr @bar, !"minctasm", i32 128}
+// CHECK: !4 = !{i32 1, !"wchar_size", i32 4}
+// CHECK: !5 = !{!"clang version 16.0.0"}
+//.
Index: clang/lib/CodeGen/TargetInfo.cpp
===
--- clang/lib/CodeGen/TargetInfo.cpp
+++ clang/lib/CodeGen/TargetInfo.cpp
@@ -7328,32 +7328,29 @@
 }
   }
 
-  // Perform special handling in CUDA mode.
-  if (M.getLangOpts().CUDA) {
-// CUDA __global__ functions get a kernel metadata entry.  Since
-// __global__ functions cannot be called from the device, we do not
-// need to set the noinline attribute.
-if (FD->hasAttr()) {
-  // Create !{, metadata !"kernel", i32 1} node
-  addNVVMMetadata(F, "kernel", 1);
-}
-if (CUDALaunchBoundsAttr *Attr = FD->getAttr()) {
-  // Create !{, metadata !"maxntidx", i32 } node
-  llvm::APSInt MaxThreads(32);
-  MaxThreads = Attr->getMaxThreads()->EvaluateKnownConstInt(M.getContext());
-  if (MaxThreads > 0)
-addNVVMMetadata(F, "maxntidx", MaxThreads.getExtValue());
-
-  // min blocks is an optional argument for CUDALaunchBoundsAttr. If it was
-  // not specified in __launch_bounds__ or if the user specified a 0 value,
-  // we don't have to add a PTX directive.
-  if (Attr->getMinBlocks()) {
-llvm::APSInt MinBlocks(32);
-MinBlocks = Attr->getMinBlocks()->EvaluateKnownConstInt(M.getContext());
-if (MinBlocks > 0)
-  // Create !{, metadata !"minctasm", i32 } node
-  addNVVMMetadata(F, "minctasm", MinBlocks.getExtValue());
-  }
+  // CUDA __global__ functions get a kernel metadata entry.  Since
+  // __global__ functions cannot be called from the device, we do not
+  // need to set the noinline attribute.
+  if (FD->hasAttr()) {
+// Create !{, metadata !"kernel", i32 1} node
+addNVVMMetadata(F, "kernel", 1);
+  }
+  if (CUDALaunchBoundsAttr *Attr = FD->getAttr()) {
+// Create !{, metadata !"maxntidx", i32 } node
+llvm::APSInt MaxThreads(32);
+MaxThreads = Attr->getMaxThreads()->EvaluateKnownConstInt(M.getContext());
+

[PATCH] D140226: [NVPTX] Introduce attribute to mark kernels without a language mode

2022-12-16 Thread Artem Belevich via Phabricator via cfe-commits
tra added inline comments.



Comment at: clang/lib/CodeGen/TargetInfo.cpp:7362
+  if (FD->hasAttr()) {
+addNVVMMetadata(F, "kernel", 1);
+  }

jhuber6 wrote:
> tra wrote:
> > How does AMDGPU track kernels? It may be a good opportunity to stop using 
> > metadata for this if we can use a better suited mechanism. E.g. a function 
> > attribute or a calling convention.
> > 
> > 
> AMDGPU uses a calling convention, which is probably a better option. I don't 
> know how this still gets reduced in the back-end though.
OK. Switching from metadata to a new calling convention would be nice, but it 
is likely a bit more complicated and can be handled separately if/when we 
decide to do it. It's not needed for your purposes. 




Comment at: clang/lib/Sema/SemaDeclAttr.cpp:4872
+  if (AL.getKind() == ParsedAttr::AT_NVPTXKernel)
+D->addAttr(::new (S.Context) NVPTXKernelAttr(S.Context, AL));
+  else

jhuber6 wrote:
> tra wrote:
> > I'm tempted to `addAttr(CUDAGlobal)` here, effectively making 
> > `nvptx_kernel` a target-specific alias for it, so we're guaranteed that 
> > they both are handled exactly the same everywhere. 
> > On the other hand, it all may be moot -- without CUDA compilation mode, 
> > `CUDAGlobal` handling will be different in this compilation mode.
> > 
> > Can CUDAGlobal itself be allowed to be used as a target-specific attribute 
> > for NVPTX in C++ mode?
> > 
> > I think, if possible, we should ideally have only one attribute doing the 
> > job, even if it may have somewhat different use cases in CUDA vs C++ 
> > compilation modes.
> > 
> > 
> Yeah that's what I was thinking. Right now we only parse and check all the 
> CUDA attributes in the CUDA language mode. I could change it to allow them 
> whenever we're compiling for the `NVPTX` architecture instead. I don't think 
> for the vast majority it would have any significant effect.
Let's give it a try.




Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D140226/new/

https://reviews.llvm.org/D140226

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D140226: [NVPTX] Introduce attribute to mark kernels without a language mode

2022-12-16 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 added inline comments.



Comment at: clang/lib/CodeGen/TargetInfo.cpp:7362
+  if (FD->hasAttr()) {
+addNVVMMetadata(F, "kernel", 1);
+  }

tra wrote:
> How does AMDGPU track kernels? It may be a good opportunity to stop using 
> metadata for this if we can use a better suited mechanism. E.g. a function 
> attribute or a calling convention.
> 
> 
AMDGPU uses a calling convention, which is probably a better option. I don't 
know how this still gets reduced in the back-end though.



Comment at: clang/lib/Sema/SemaDeclAttr.cpp:4872
+  if (AL.getKind() == ParsedAttr::AT_NVPTXKernel)
+D->addAttr(::new (S.Context) NVPTXKernelAttr(S.Context, AL));
+  else

tra wrote:
> I'm tempted to `addAttr(CUDAGlobal)` here, effectively making `nvptx_kernel` 
> a target-specific alias for it, so we're guaranteed that they both are 
> handled exactly the same everywhere. 
> On the other hand, it all may be moot -- without CUDA compilation mode, 
> `CUDAGlobal` handling will be different in this compilation mode.
> 
> Can CUDAGlobal itself be allowed to be used as a target-specific attribute 
> for NVPTX in C++ mode?
> 
> I think, if possible, we should ideally have only one attribute doing the 
> job, even if it may have somewhat different use cases in CUDA vs C++ 
> compilation modes.
> 
> 
Yeah that's what I was thinking. Right now we only parse and check all the CUDA 
attributes in the CUDA language mode. I could change it to allow them whenever 
we're compiling for the `NVPTX` architecture instead. I don't think for the 
vast majority it would have any significant effect.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D140226/new/

https://reviews.llvm.org/D140226

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D140226: [NVPTX] Introduce attribute to mark kernels without a language mode

2022-12-16 Thread Artem Belevich via Phabricator via cfe-commits
tra added inline comments.



Comment at: clang/lib/CodeGen/TargetInfo.cpp:7362
+  if (FD->hasAttr()) {
+addNVVMMetadata(F, "kernel", 1);
+  }

How does AMDGPU track kernels? It may be a good opportunity to stop using 
metadata for this if we can use a better suited mechanism. E.g. a function 
attribute or a calling convention.





Comment at: clang/lib/Sema/SemaDeclAttr.cpp:4872
+  if (AL.getKind() == ParsedAttr::AT_NVPTXKernel)
+D->addAttr(::new (S.Context) NVPTXKernelAttr(S.Context, AL));
+  else

I'm tempted to `addAttr(CUDAGlobal)` here, effectively making `nvptx_kernel` a 
target-specific alias for it, so we're guaranteed that they both are handled 
exactly the same everywhere. 
On the other hand, it all may be moot -- without CUDA compilation mode, 
`CUDAGlobal` handling will be different in this compilation mode.

Can CUDAGlobal itself be allowed to be used as a target-specific attribute for 
NVPTX in C++ mode?

I think, if possible, we should ideally have only one attribute doing the job, 
even if it may have somewhat different use cases in CUDA vs C++ compilation 
modes.




Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D140226/new/

https://reviews.llvm.org/D140226

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D140226: [NVPTX] Introduce attribute to mark kernels without a language mode

2022-12-16 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 created this revision.
jhuber6 added reviewers: tra, JonChesterfield.
Herald added subscribers: kosarev, mattd, gchakrabarti, asavonic, jdoerfert, 
Anastasia, tpr.
Herald added a reviewer: aaron.ballman.
Herald added a project: All.
jhuber6 requested review of this revision.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

We may want to be able to mark certain regions as kernels even without
being in an accepted CUDA or OpenCL language mode. This patch introduces
a new attribute limited to `nvptx` targets called `nvptx_kernel` which
will perform the same metadata action as the existing CUDA ones. This
closely mimics the behaviour of the `amdgpu_kernel` attribute. This
allows for making executable NVPTX device images without using an
existing offloading language model.

I was unsure how to do this, I could potentially re-use all the CUDA
attributes and just replace the `CUDA` language requirement with an
`NVPTX` architecture requirement. Also I don't know if I should add more
than just this attribute.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D140226

Files:
  clang/include/clang/Basic/Attr.td
  clang/lib/CodeGen/TargetInfo.cpp
  clang/lib/Sema/SemaDeclAttr.cpp
  clang/test/CodeGen/nvptx_attributes.c


Index: clang/test/CodeGen/nvptx_attributes.c
===
--- /dev/null
+++ clang/test/CodeGen/nvptx_attributes.c
@@ -0,0 +1,22 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --function-signature --check-attributes --check-globals
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -target-cpu sm_61 -emit-llvm %s 
-o - | FileCheck %s
+// CHECK: Function Attrs: noinline nounwind optnone
+// CHECK-LABEL: define {{[^@]+}}@foo
+// CHECK-SAME: (ptr noundef [[RET:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[RET_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:store ptr [[RET]], ptr [[RET_ADDR]], align 8
+// CHECK-NEXT:[[TMP0:%.*]] = load ptr, ptr [[RET_ADDR]], align 8
+// CHECK-NEXT:store i32 1, ptr [[TMP0]], align 4
+// CHECK-NEXT:ret void
+//
+__attribute__((nvptx_kernel)) void foo(int *ret) {
+  *ret = 1;
+}
+//.
+// CHECK: attributes #0 = { noinline nounwind optnone "frame-pointer"="none" 
"min-legal-vector-width"="0" "no-trapping-math"="true" 
"stack-protector-buffer-size"="8" "target-cpu"="sm_61" 
"target-features"="+ptx32,+sm_61" }
+//.
+// CHECK: !0 = !{ptr @foo, !"kernel", i32 1}
+// CHECK: !1 = !{i32 1, !"wchar_size", i32 4}
+// CHECK: !2 = !{!"clang version 16.0.0"}
+//.
Index: clang/lib/Sema/SemaDeclAttr.cpp
===
--- clang/lib/Sema/SemaDeclAttr.cpp
+++ clang/lib/Sema/SemaDeclAttr.cpp
@@ -4868,7 +4868,10 @@
   if (FD->isInlineSpecified() && !S.getLangOpts().CUDAIsDevice)
 S.Diag(FD->getBeginLoc(), diag::warn_kern_is_inline) << FD;
 
-  D->addAttr(::new (S.Context) CUDAGlobalAttr(S.Context, AL));
+  if (AL.getKind() == ParsedAttr::AT_NVPTXKernel)
+D->addAttr(::new (S.Context) NVPTXKernelAttr(S.Context, AL));
+  else
+D->addAttr(::new (S.Context) CUDAGlobalAttr(S.Context, AL));
   // In host compilation the kernel is emitted as a stub function, which is
   // a helper function for launching the kernel. The instructions in the helper
   // function has nothing to do with the source code of the kernel. Do not emit
@@ -8744,6 +8747,7 @@
   case ParsedAttr::AT_CalledOnce:
 handleCalledOnceAttr(S, D, AL);
 break;
+  case ParsedAttr::AT_NVPTXKernel:
   case ParsedAttr::AT_CUDAGlobal:
 handleGlobalAttr(S, D, AL);
 break;
Index: clang/lib/CodeGen/TargetInfo.cpp
===
--- clang/lib/CodeGen/TargetInfo.cpp
+++ clang/lib/CodeGen/TargetInfo.cpp
@@ -7356,6 +7356,11 @@
   }
 }
   }
+
+  // Attach kernel metadata directly if compiling for NVPTX.
+  if (FD->hasAttr()) {
+addNVVMMetadata(F, "kernel", 1);
+  }
 }
 
 void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::GlobalValue *GV,
Index: clang/include/clang/Basic/Attr.td
===
--- clang/include/clang/Basic/Attr.td
+++ clang/include/clang/Basic/Attr.td
@@ -408,6 +408,7 @@
 def TargetX86 : TargetArch<["x86"]>;
 def TargetAnyX86 : TargetArch<["x86", "x86_64"]>;
 def TargetWebAssembly : TargetArch<["wasm32", "wasm64"]>;
+def TargetNVPTX : TargetArch<["nvptx", "nvptx64"]>;
 def TargetWindows : TargetSpec {
   let OSes = ["Win32"];
 }
@@ -1211,6 +1212,12 @@
 }
 def : MutualExclusions<[CUDAGlobal, CUDAHost]>;
 
+def NVPTXKernel : InheritableAttr, TargetSpecificAttr {
+  let Spellings = [Clang<"nvptx_kernel">];
+  let Subjects = SubjectList<[Function]>;
+  let Documentation = [Undocumented];
+}
+
 def HIPManaged : InheritableAttr {
   let Spellings = [GNU<"managed">, Declspec<"__managed__">];
   let Subjects = SubjectList<[Var]>;


Index: