yaxunl updated this revision to Diff 544345.
yaxunl retitled this revision from "[HIP] Add `-fno-hip-uniform-block`" to 
"[HIP] Add `-fno-offload-uniform-block`".
yaxunl edited the summary of this revision.
yaxunl added a comment.

rename the option


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D155213/new/

https://reviews.llvm.org/D155213

Files:
  clang/include/clang/Basic/LangOptions.def
  clang/include/clang/Driver/Options.td
  clang/lib/CodeGen/CGCall.cpp
  clang/lib/CodeGen/Targets/AMDGPU.cpp
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/test/CodeGenHIP/default-attributes.hip
  clang/test/Driver/hip-options.hip

Index: clang/test/Driver/hip-options.hip
===================================================================
--- clang/test/Driver/hip-options.hip
+++ clang/test/Driver/hip-options.hip
@@ -205,3 +205,24 @@
 
 // RUN: %clang -fdriver-only -Werror --target=x86_64-unknown-linux-gnu -nostdinc -nostdlib -fgpu-approx-transcendentals \
 // RUN:   -x c++ %s 2>&1 | count 0
+/ Check -fno-offload-uniform-block is passed to clang -cc1 but
+// (default) -fno-offload-uniform-block is not.
+
+// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib -fno-offload-uniform-block \
+// RUN:   --cuda-gpu-arch=gfx906 %s 2>&1 | FileCheck -check-prefix=NOUNIBLK %s
+
+// NOUNIBLK: "-cc1"{{.*}} "-triple" "amdgcn-amd-amdhsa" {{.*}} "-fno-offload-uniform-block"
+// NOUNIBLK: "-cc1"{{.*}} "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-fno-offload-uniform-block"
+
+// RUN: %clang -### -nogpuinc -nogpulib -foffload-uniform-block \
+// RUN:   --cuda-gpu-arch=gfx906 %s 2>&1 | FileCheck -check-prefix=UNIBLK %s
+
+// RUN: %clang -### -nogpuinc -nogpulib \
+// RUN:   --cuda-gpu-arch=gfx906 %s 2>&1 | FileCheck -check-prefix=UNIBLK %s
+
+// UNIBLK-NOT: "--{{(no-)?}}offload-uniform-block"
+
+// Check no warnings for --[no-]offload-uniform-block.
+
+// RUN: %clang -fdriver-only -Werror --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib -fno-offload-uniform-block \
+// RUN:   -fno-offload-uniform-block --cuda-gpu-arch=gfx906 %s 2>&1 | count 0
Index: clang/test/CodeGenHIP/default-attributes.hip
===================================================================
--- clang/test/CodeGenHIP/default-attributes.hip
+++ clang/test/CodeGenHIP/default-attributes.hip
@@ -5,6 +5,9 @@
 // RUN: %clang_cc1 -O3 -triple amdgcn-amd-amdhsa -x hip -fno-ident -fcuda-is-device \
 // RUN:    -emit-llvm -o - %s | FileCheck -check-prefix=OPT %s
 
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -fno-ident -fcuda-is-device -fno-offload-uniform-block \
+// RUN:    -emit-llvm -o - %s | FileCheck -check-prefix=NOUNIBLK %s
+
 #define __device__ __attribute__((device))
 #define __global__ __attribute__((global))
 
@@ -20,6 +23,12 @@
 // OPT-NEXT:  entry:
 // OPT-NEXT:    ret void
 //
+// NOUNIBLK: Function Attrs: convergent mustprogress noinline nounwind optnone
+// NOUNIBLK-LABEL: define {{[^@]+}}@_Z4funcv
+// NOUNIBLK-SAME: () #[[ATTR0:[0-9]+]] {
+// NOUNIBLK-NEXT:  entry:
+// NOUNIBLK-NEXT:    ret void
+//
 __device__ void func() {
 
 }
@@ -36,21 +45,34 @@
 // OPT-NEXT:  entry:
 // OPT-NEXT:    ret void
 //
+// NOUNIBLK: Function Attrs: convergent mustprogress noinline norecurse nounwind optnone
+// NOUNIBLK-LABEL: define {{[^@]+}}@_Z6kernelv
+// NOUNIBLK-SAME: () #[[ATTR1:[0-9]+]] {
+// NOUNIBLK-NEXT:  entry:
+// NOUNIBLK-NEXT:    ret void
+//
 __global__ void kernel() {
 
 }
 //.
-// OPTNONE: attributes #0 = { convergent mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
-// OPTNONE: attributes #1 = { convergent mustprogress noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,1024" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" }
+// OPTNONE: attributes #[[ATTR0]] = { convergent mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+// OPTNONE: attributes #[[ATTR1]] = { convergent mustprogress noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,1024" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" }
+//.
+// OPT: attributes #[[ATTR0]] = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+// OPT: attributes #[[ATTR1]] = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "amdgpu-flat-work-group-size"="1,1024" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" }
+//.
+// NOUNIBLK: attributes #[[ATTR0]] = { convergent mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+// NOUNIBLK: attributes #[[ATTR1]] = { convergent mustprogress noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,1024" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
 //.
-// OPT: attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
-// OPT: attributes #1 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "amdgpu-flat-work-group-size"="1,1024" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" }
+// OPTNONE: [[META0:![0-9]+]] = !{i32 1, !"amdgpu_code_object_version", i32 400}
+// OPTNONE: [[META1:![0-9]+]] = !{i32 1, !"amdgpu_printf_kind", !"hostcall"}
+// OPTNONE: [[META2:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
 //.
-// OPTNONE: !0 = !{i32 1, !"amdgpu_code_object_version", i32 400}
-// OPTNONE: !1 = !{i32 1, !"amdgpu_printf_kind", !"hostcall"}
-// OPTNONE: !2 = !{i32 1, !"wchar_size", i32 4}
+// OPT: [[META0:![0-9]+]] = !{i32 1, !"amdgpu_code_object_version", i32 400}
+// OPT: [[META1:![0-9]+]] = !{i32 1, !"amdgpu_printf_kind", !"hostcall"}
+// OPT: [[META2:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
 //.
-// OPT: !0 = !{i32 1, !"amdgpu_code_object_version", i32 400}
-// OPT: !1 = !{i32 1, !"amdgpu_printf_kind", !"hostcall"}
-// OPT: !2 = !{i32 1, !"wchar_size", i32 4}
+// NOUNIBLK: [[META0:![0-9]+]] = !{i32 1, !"amdgpu_code_object_version", i32 400}
+// NOUNIBLK: [[META1:![0-9]+]] = !{i32 1, !"amdgpu_printf_kind", !"hostcall"}
+// NOUNIBLK: [[META2:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
 //.
Index: clang/lib/Driver/ToolChains/Clang.cpp
===================================================================
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -7262,6 +7262,8 @@
   if (IsHIP) {
     CmdArgs.push_back("-fcuda-allow-variadic-functions");
     Args.AddLastArg(CmdArgs, options::OPT_fgpu_default_stream_EQ);
+    Args.addOptOutFlag(CmdArgs, options::OPT_foffload_uniform_block,
+                       options::OPT_fno_offload_uniform_block);
   }
 
   if (IsCudaDevice || IsHIPDevice) {
Index: clang/lib/CodeGen/Targets/AMDGPU.cpp
===================================================================
--- clang/lib/CodeGen/Targets/AMDGPU.cpp
+++ clang/lib/CodeGen/Targets/AMDGPU.cpp
@@ -401,13 +401,6 @@
   if (FD)
     setFunctionDeclAttributes(FD, F, M);
 
-  const bool IsHIPKernel =
-      M.getLangOpts().HIP && FD && FD->hasAttr<CUDAGlobalAttr>();
-
-  // TODO: This should be moved to language specific attributes instead.
-  if (IsHIPKernel)
-    F->addFnAttr("uniform-work-group-size", "true");
-
   if (M.getContext().getTargetInfo().allowAMDGPUUnsafeFPAtomics())
     F->addFnAttr("amdgpu-unsafe-fp-atomics", "true");
 
Index: clang/lib/CodeGen/CGCall.cpp
===================================================================
--- clang/lib/CodeGen/CGCall.cpp
+++ clang/lib/CodeGen/CGCall.cpp
@@ -2402,6 +2402,10 @@
                                llvm::toStringRef(CodeGenOpts.UniformWGSize));
       }
     }
+
+    if (TargetDecl->hasAttr<CUDAGlobalAttr>() &&
+        getLangOpts().OffloadUniformBlock)
+      FuncAttrs.addAttribute("uniform-work-group-size", "true");
   }
 
   // Attach "no-builtins" attributes to:
Index: clang/include/clang/Driver/Options.td
===================================================================
--- clang/include/clang/Driver/Options.td
+++ clang/include/clang/Driver/Options.td
@@ -1089,6 +1089,10 @@
   NegFlag<SetFalse, [], "Don't specify">,
   BothFlags<[], " that kernel argument names are preserved (HIP only)">>,
   ShouldParseIf<hip.KeyPath>;
+defm offload_uniform_block : BoolFOption<"offload-uniform-block",
+  LangOpts<"OffloadUniformBlock">, DefaultTrue,
+  PosFlag<SetTrue, [], "Assume">, NegFlag<SetFalse, [CC1Option], "Don't assume">,
+  BothFlags<[], " that kernels are launched with uniform block sizes">>;
 def hipspv_pass_plugin_EQ : Joined<["--"], "hipspv-pass-plugin=">,
   Group<Link_Group>, MetaVarName<"<dsopath>">,
   HelpText<"path to a pass plugin for HIP to SPIR-V passes.">;
Index: clang/include/clang/Basic/LangOptions.def
===================================================================
--- clang/include/clang/Basic/LangOptions.def
+++ clang/include/clang/Basic/LangOptions.def
@@ -278,6 +278,7 @@
 ENUM_LANGOPT(SYCLVersion  , SYCLMajorVersion, 2, SYCL_None, "Version of the SYCL standard used")
 
 LANGOPT(HIPUseNewLaunchAPI, 1, 0, "Use new kernel launching API for HIP")
+LANGOPT(OffloadUniformBlock, 1, 0, "Assume that HIP kernels are launched with uniform block sizes")
 
 LANGOPT(SizedDeallocation , 1, 0, "sized deallocation")
 LANGOPT(AlignedAllocation , 1, 0, "aligned allocation")
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to