[PATCH] D155213: [HIP] Add `-fno-offload-uniform-block`

2023-07-27 Thread Yaxun Liu via Phabricator via cfe-commits
yaxunl updated this revision to Diff 544812.
yaxunl marked an inline comment as done.
yaxunl added a reviewer: Anastasia.
yaxunl added a comment.

revised by comments


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D155213/new/

https://reviews.llvm.org/D155213

Files:
  clang/include/clang/Basic/CodeGenOptions.def
  clang/include/clang/Basic/LangOptions.def
  clang/include/clang/Driver/Options.td
  clang/lib/CodeGen/CGCall.cpp
  clang/lib/CodeGen/Targets/AMDGPU.cpp
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu
  clang/test/CodeGenOpenCL/cl-uniform-wg-size.cl
  clang/test/Driver/hip-options.hip
  clang/test/Driver/opencl.cl

Index: clang/test/Driver/opencl.cl
===
--- clang/test/Driver/opencl.cl
+++ clang/test/Driver/opencl.cl
@@ -17,6 +17,8 @@
 // RUN: %clang -S -### -cl-denorms-are-zero %s 2>&1 | FileCheck --check-prefix=CHECK-DENORMS-ARE-ZERO %s
 // RUN: %clang -S -### -cl-fp32-correctly-rounded-divide-sqrt %s 2>&1 | FileCheck --check-prefix=CHECK-ROUND-DIV %s
 // RUN: %clang -S -### -cl-uniform-work-group-size %s 2>&1 | FileCheck --check-prefix=CHECK-UNIFORM-WG %s
+// RUN: %clang -S -### -foffload-uniform-block %s 2>&1 | FileCheck --check-prefix=CHECK-UNIFORM-WG %s
+// RUN: %clang -S -### -fno-offload-uniform-block -cl-uniform-work-group-size %s 2>&1 | FileCheck --check-prefix=CHECK-UNIFORM-WG %s
 // RUN: not %clang -cl-std=c99 -DOPENCL %s 2>&1 | FileCheck --check-prefix=CHECK-C99 %s
 // RUN: not %clang -cl-std=invalid -DOPENCL %s 2>&1 | FileCheck --check-prefix=CHECK-INVALID %s
 // RUN: %clang -S -### -target spir-unknown-unknown %s 2>&1 | FileCheck --check-prefix=CHECK-W-SPIR-COMPAT %s
@@ -44,7 +46,7 @@
 // CHECK-DENORMS-ARE-ZERO-NOT: "-cl-denorms-are-zero"
 
 // CHECK-ROUND-DIV: "-cc1" {{.*}} "-cl-fp32-correctly-rounded-divide-sqrt"
-// CHECK-UNIFORM-WG: "-cc1" {{.*}} "-cl-uniform-work-group-size"
+// CHECK-UNIFORM-WG: "-cc1" {{.*}} "-foffload-uniform-block"
 // CHECK-C99: error: invalid value 'c99' in '-cl-std=c99'
 // CHECK-INVALID: error: invalid value 'invalid' in '-cl-std=invalid'
 
Index: clang/test/Driver/hip-options.hip
===
--- clang/test/Driver/hip-options.hip
+++ clang/test/Driver/hip-options.hip
@@ -205,3 +205,27 @@
 
 // RUN: %clang -fdriver-only -Werror --target=x86_64-unknown-linux-gnu -nostdinc -nostdlib -fgpu-approx-transcendentals \
 // RUN:   -x c++ %s 2>&1 | count 0
+/ Check -fno-offload-uniform-block is passed to clang -cc1 but
+// (default) -fno-offload-uniform-block is not.
+
+// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib -fno-offload-uniform-block \
+// RUN:   --cuda-gpu-arch=gfx906 %s 2>&1 | FileCheck -check-prefix=NOUNIBLK %s
+
+// NOUNIBLK: "-cc1"{{.*}} "-triple" "amdgcn-amd-amdhsa" {{.*}} "-fno-offload-uniform-block"
+// NOUNIBLK: "-cc1"{{.*}} "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-fno-offload-uniform-block"
+
+// RUN: %clang -### -nogpuinc -nogpulib -foffload-uniform-block \
+// RUN:   --cuda-gpu-arch=gfx906 %s 2>&1 | FileCheck -check-prefix=UNIBLK %s
+
+// UNIBLK: "-cc1"{{.*}} "-triple" "amdgcn-amd-amdhsa" {{.*}} "-foffload-uniform-block"
+// UNIBLK: "-cc1"{{.*}} "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-foffload-uniform-block"
+
+// RUN: %clang -### -nogpuinc -nogpulib \
+// RUN:   --cuda-gpu-arch=gfx906 %s 2>&1 | FileCheck -check-prefix=DEFUNIBLK %s
+
+// DEFUNIBLK-NOT: "-f{{(no-)?}}offload-uniform-block"
+
+// Check no warnings for -f[no-]offload-uniform-block.
+
+// RUN: %clang -fdriver-only -Werror --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib -fno-offload-uniform-block \
+// RUN:   -foffload-uniform-block --cuda-gpu-arch=gfx906 %s 2>&1 | count 0
Index: clang/test/CodeGenOpenCL/cl-uniform-wg-size.cl
===
--- clang/test/CodeGenOpenCL/cl-uniform-wg-size.cl
+++ clang/test/CodeGenOpenCL/cl-uniform-wg-size.cl
@@ -1,6 +1,7 @@
 // RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL1.2 -o - %s 2>&1 | FileCheck %s -check-prefixes CHECK,CHECK-UNIFORM
 // RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL2.0 -o - %s 2>&1 | FileCheck %s -check-prefixes CHECK,CHECK-NONUNIFORM
 // RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL2.0 -cl-uniform-work-group-size -o - %s 2>&1 | FileCheck %s -check-prefixes CHECK,CHECK-UNIFORM
+// RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL2.0 -foffload-uniform-block -o - %s 2>&1 | FileCheck %s -check-prefixes CHECK,CHECK-UNIFORM
 
 kernel void ker() {};
 // CHECK: define{{.*}}@ker() #0
Index: clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu
===
--- clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu
+++ clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu
@@ -10,10 +10,18 @@
 // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm \
 // RUN: -verify -o - -x hip %s | FileCheck -check-prefix=NAMD %s
 
+// RUN: %clang_cc1 -triple 

[PATCH] D155213: [HIP] Add `-fno-offload-uniform-block`

2023-07-27 Thread Yaxun Liu via Phabricator via cfe-commits
yaxunl marked an inline comment as done.
yaxunl added inline comments.



Comment at: clang/lib/CodeGen/CGCall.cpp:2391
 
 if (TargetDecl->hasAttr()) {
   if (getLangOpts().OpenCLVersion <= 120) {

scchan wrote:
> The block here needs to be aware of this new flag.
Now that `-foffload-uniform-block` has the same default value as 
`-cl-uniform-work-group-size` for OpenCL. we can make 
`-cl-uniform-work-group-size` an alias to `-foffload-uniform-block`. will update


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D155213/new/

https://reviews.llvm.org/D155213

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D155213: [HIP] Add `-fno-offload-uniform-block`

2023-07-27 Thread Siu Chi Chan via Phabricator via cfe-commits
scchan added inline comments.



Comment at: clang/lib/CodeGen/CGCall.cpp:2391
 
 if (TargetDecl->hasAttr()) {
   if (getLangOpts().OpenCLVersion <= 120) {

The block here needs to be aware of this new flag.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D155213/new/

https://reviews.llvm.org/D155213

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D155213: [HIP] Add `-fno-offload-uniform-block`

2023-07-26 Thread Yaxun Liu via Phabricator via cfe-commits
yaxunl updated this revision to Diff 544491.
yaxunl edited the summary of this revision.
yaxunl added a comment.

make the option generic for offloading languages


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D155213/new/

https://reviews.llvm.org/D155213

Files:
  clang/include/clang/Basic/LangOptions.def
  clang/include/clang/Driver/Options.td
  clang/lib/CodeGen/CGCall.cpp
  clang/lib/CodeGen/Targets/AMDGPU.cpp
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu
  clang/test/Driver/hip-options.hip

Index: clang/test/Driver/hip-options.hip
===
--- clang/test/Driver/hip-options.hip
+++ clang/test/Driver/hip-options.hip
@@ -205,3 +205,27 @@
 
 // RUN: %clang -fdriver-only -Werror --target=x86_64-unknown-linux-gnu -nostdinc -nostdlib -fgpu-approx-transcendentals \
 // RUN:   -x c++ %s 2>&1 | count 0
+/ Check -fno-offload-uniform-block is passed to clang -cc1 but
+// (default) -fno-offload-uniform-block is not.
+
+// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib -fno-offload-uniform-block \
+// RUN:   --cuda-gpu-arch=gfx906 %s 2>&1 | FileCheck -check-prefix=NOUNIBLK %s
+
+// NOUNIBLK: "-cc1"{{.*}} "-triple" "amdgcn-amd-amdhsa" {{.*}} "-fno-offload-uniform-block"
+// NOUNIBLK: "-cc1"{{.*}} "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-fno-offload-uniform-block"
+
+// RUN: %clang -### -nogpuinc -nogpulib -foffload-uniform-block \
+// RUN:   --cuda-gpu-arch=gfx906 %s 2>&1 | FileCheck -check-prefix=UNIBLK %s
+
+// UNIBLK: "-cc1"{{.*}} "-triple" "amdgcn-amd-amdhsa" {{.*}} "-foffload-uniform-block"
+// UNIBLK: "-cc1"{{.*}} "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-foffload-uniform-block"
+
+// RUN: %clang -### -nogpuinc -nogpulib \
+// RUN:   --cuda-gpu-arch=gfx906 %s 2>&1 | FileCheck -check-prefix=DEFUNIBLK %s
+
+// DEFUNIBLK-NOT: "-f{{(no-)?}}offload-uniform-block"
+
+// Check no warnings for -f[no-]offload-uniform-block.
+
+// RUN: %clang -fdriver-only -Werror --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib -fno-offload-uniform-block \
+// RUN:   -foffload-uniform-block --cuda-gpu-arch=gfx906 %s 2>&1 | count 0
Index: clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu
===
--- clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu
+++ clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu
@@ -10,10 +10,18 @@
 // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm \
 // RUN: -verify -o - -x hip %s | FileCheck -check-prefix=NAMD %s
 
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -foffload-uniform-block \
+// RUN: -fcuda-is-device -emit-llvm -o - -x hip %s \
+// RUN: | FileCheck -check-prefixes=CHECK,DEFAULT %s
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fno-offload-uniform-block \
+// RUN: -fcuda-is-device -emit-llvm -o - -x hip %s \
+// RUN: | FileCheck -check-prefixes=NOUB %s
+
 #include "Inputs/cuda.h"
 
 __global__ void flat_work_group_size_default() {
 // CHECK: define{{.*}} amdgpu_kernel void @_Z28flat_work_group_size_defaultv() [[FLAT_WORK_GROUP_SIZE_DEFAULT:#[0-9]+]]
+// NOUB: define{{.*}} void @_Z28flat_work_group_size_defaultv() [[NOUB:#[0-9]+]]
 }
 
 __attribute__((amdgpu_flat_work_group_size(32, 64))) // expected-no-diagnostics
@@ -45,3 +53,5 @@
 // CHECK-DAG: attributes [[WAVES_PER_EU_2]] = {{.*}}"amdgpu-waves-per-eu"="2"
 // CHECK-DAG: attributes [[NUM_SGPR_32]] = {{.*}}"amdgpu-num-sgpr"="32"
 // CHECK-DAG: attributes [[NUM_VGPR_64]] = {{.*}}"amdgpu-num-vgpr"="64"
+
+// NOUB-NOT: "uniform-work-group-size"="true"
Index: clang/lib/Driver/ToolChains/Clang.cpp
===
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -7264,6 +7264,9 @@
 Args.AddLastArg(CmdArgs, options::OPT_fgpu_default_stream_EQ);
   }
 
+  Args.AddLastArg(CmdArgs, options::OPT_foffload_uniform_block,
+  options::OPT_fno_offload_uniform_block);
+
   if (IsCudaDevice || IsHIPDevice) {
 StringRef InlineThresh =
 Args.getLastArgValue(options::OPT_fgpu_inline_threshold_EQ);
Index: clang/lib/CodeGen/Targets/AMDGPU.cpp
===
--- clang/lib/CodeGen/Targets/AMDGPU.cpp
+++ clang/lib/CodeGen/Targets/AMDGPU.cpp
@@ -401,13 +401,6 @@
   if (FD)
 setFunctionDeclAttributes(FD, F, M);
 
-  const bool IsHIPKernel =
-  M.getLangOpts().HIP && FD && FD->hasAttr();
-
-  // TODO: This should be moved to language specific attributes instead.
-  if (IsHIPKernel)
-F->addFnAttr("uniform-work-group-size", "true");
-
   if (M.getContext().getTargetInfo().allowAMDGPUUnsafeFPAtomics())
 F->addFnAttr("amdgpu-unsafe-fp-atomics", "true");
 
Index: clang/lib/CodeGen/CGCall.cpp
===
--- clang/lib/CodeGen/CGCall.cpp
+++ clang/lib/CodeGen/CGCall.cpp
@@ -2402,6 +2402,10 @@
  

[PATCH] D155213: [HIP] Add `-fno-offload-uniform-block`

2023-07-26 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert added a comment.

This should be named -foffload*, it should not use HIP in the description, and 
it should apply to OpenMP as well.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D155213/new/

https://reviews.llvm.org/D155213

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D155213: [HIP] Add `-fno-offload-uniform-block`

2023-07-26 Thread Yaxun Liu via Phabricator via cfe-commits
yaxunl added a comment.

ping

I renamed the option as `-fno-offload-uniform-block`. I switched to `offload` 
instead of `gpu` because I think in the long run offloading toolchains are not 
limited to GPUs, therefore sooner or later we will feel `-fgpu-` options are 
awkward. I did not use `--no-offload-uniform-block` because Options.td does not 
allow marshalling `--` prefixed options. It needs considerable change to some 
basic multiclass to achieve that and that would break quite a few established 
conventions. Therefore I feel it is better to follow the convention.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D155213/new/

https://reviews.llvm.org/D155213

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D155213: [HIP] Add `-fno-offload-uniform-block`

2023-07-26 Thread Yaxun Liu via Phabricator via cfe-commits
yaxunl updated this revision to Diff 544345.
yaxunl retitled this revision from "[HIP] Add `-fno-hip-uniform-block`" to 
"[HIP] Add `-fno-offload-uniform-block`".
yaxunl edited the summary of this revision.
yaxunl added a comment.

rename the option


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D155213/new/

https://reviews.llvm.org/D155213

Files:
  clang/include/clang/Basic/LangOptions.def
  clang/include/clang/Driver/Options.td
  clang/lib/CodeGen/CGCall.cpp
  clang/lib/CodeGen/Targets/AMDGPU.cpp
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/test/CodeGenHIP/default-attributes.hip
  clang/test/Driver/hip-options.hip

Index: clang/test/Driver/hip-options.hip
===
--- clang/test/Driver/hip-options.hip
+++ clang/test/Driver/hip-options.hip
@@ -205,3 +205,24 @@
 
 // RUN: %clang -fdriver-only -Werror --target=x86_64-unknown-linux-gnu -nostdinc -nostdlib -fgpu-approx-transcendentals \
 // RUN:   -x c++ %s 2>&1 | count 0
+/ Check -fno-offload-uniform-block is passed to clang -cc1 but
+// (default) -fno-offload-uniform-block is not.
+
+// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib -fno-offload-uniform-block \
+// RUN:   --cuda-gpu-arch=gfx906 %s 2>&1 | FileCheck -check-prefix=NOUNIBLK %s
+
+// NOUNIBLK: "-cc1"{{.*}} "-triple" "amdgcn-amd-amdhsa" {{.*}} "-fno-offload-uniform-block"
+// NOUNIBLK: "-cc1"{{.*}} "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-fno-offload-uniform-block"
+
+// RUN: %clang -### -nogpuinc -nogpulib -foffload-uniform-block \
+// RUN:   --cuda-gpu-arch=gfx906 %s 2>&1 | FileCheck -check-prefix=UNIBLK %s
+
+// RUN: %clang -### -nogpuinc -nogpulib \
+// RUN:   --cuda-gpu-arch=gfx906 %s 2>&1 | FileCheck -check-prefix=UNIBLK %s
+
+// UNIBLK-NOT: "--{{(no-)?}}offload-uniform-block"
+
+// Check no warnings for --[no-]offload-uniform-block.
+
+// RUN: %clang -fdriver-only -Werror --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib -fno-offload-uniform-block \
+// RUN:   -fno-offload-uniform-block --cuda-gpu-arch=gfx906 %s 2>&1 | count 0
Index: clang/test/CodeGenHIP/default-attributes.hip
===
--- clang/test/CodeGenHIP/default-attributes.hip
+++ clang/test/CodeGenHIP/default-attributes.hip
@@ -5,6 +5,9 @@
 // RUN: %clang_cc1 -O3 -triple amdgcn-amd-amdhsa -x hip -fno-ident -fcuda-is-device \
 // RUN:-emit-llvm -o - %s | FileCheck -check-prefix=OPT %s
 
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -fno-ident -fcuda-is-device -fno-offload-uniform-block \
+// RUN:-emit-llvm -o - %s | FileCheck -check-prefix=NOUNIBLK %s
+
 #define __device__ __attribute__((device))
 #define __global__ __attribute__((global))
 
@@ -20,6 +23,12 @@
 // OPT-NEXT:  entry:
 // OPT-NEXT:ret void
 //
+// NOUNIBLK: Function Attrs: convergent mustprogress noinline nounwind optnone
+// NOUNIBLK-LABEL: define {{[^@]+}}@_Z4funcv
+// NOUNIBLK-SAME: () #[[ATTR0:[0-9]+]] {
+// NOUNIBLK-NEXT:  entry:
+// NOUNIBLK-NEXT:ret void
+//
 __device__ void func() {
 
 }
@@ -36,21 +45,34 @@
 // OPT-NEXT:  entry:
 // OPT-NEXT:ret void
 //
+// NOUNIBLK: Function Attrs: convergent mustprogress noinline norecurse nounwind optnone
+// NOUNIBLK-LABEL: define {{[^@]+}}@_Z6kernelv
+// NOUNIBLK-SAME: () #[[ATTR1:[0-9]+]] {
+// NOUNIBLK-NEXT:  entry:
+// NOUNIBLK-NEXT:ret void
+//
 __global__ void kernel() {
 
 }
 //.
-// OPTNONE: attributes #0 = { convergent mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
-// OPTNONE: attributes #1 = { convergent mustprogress noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,1024" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" }
+// OPTNONE: attributes #[[ATTR0]] = { convergent mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+// OPTNONE: attributes #[[ATTR1]] = { convergent mustprogress noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,1024" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" }
+//.
+// OPT: attributes #[[ATTR0]] = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+// OPT: attributes #[[ATTR1]] = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "amdgpu-flat-work-group-size"="1,1024" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" }
+//.
+// NOUNIBLK: attributes #[[ATTR0]] = { convergent mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+// NOUNIBLK: attributes #[[ATTR1]] = { convergent mustprogress noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,1024" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
 //.
-// OPT: attributes #0 = { mustprogress nofree