yaxunl created this revision.
yaxunl added reviewers: tra, b-sumner.
Herald added a project: All.
yaxunl requested review of this revision.
Herald added a subscriber: MaskRay.

Add option -fhip-kernel-arg-name to emit kernel argument
name metadata, which is needed for certain HIP applications.


https://reviews.llvm.org/D128022

Files:
  clang/include/clang/Basic/CodeGenOptions.def
  clang/include/clang/Driver/Options.td
  clang/lib/CodeGen/CGDeclCXX.cpp
  clang/lib/CodeGen/CodeGenFunction.cpp
  clang/lib/CodeGen/CodeGenFunction.h
  clang/lib/CodeGen/CodeGenModule.cpp
  clang/lib/CodeGen/CodeGenModule.h
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/test/CodeGenCUDA/kernel-arg-name-metadata.cu
  clang/test/Driver/hip-options.hip

Index: clang/test/Driver/hip-options.hip
===================================================================
--- clang/test/Driver/hip-options.hip
+++ clang/test/Driver/hip-options.hip
@@ -116,3 +116,10 @@
 // RUN:   --cuda-gpu-arch=gfx906 -Xoffload-linker --build-id=md5 %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=OFL-LINK %s
 // OFL-LINK: lld{{.*}}"--build-id=md5"
+
+// Check -fhip-kernel-arg-name is passed to clang -cc1.
+
+// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib \
+// RUN:   --offload-arch=gfx906 -fhip-kernel-arg-name %s 2>&1 \
+// RUN:   | FileCheck -check-prefix=KAN %s
+// KAN: "-cc1"{{.*}} "-triple" "amdgcn-amd-amdhsa" {{.*}} "-fhip-kernel-arg-name"
Index: clang/test/CodeGenCUDA/kernel-arg-name-metadata.cu
===================================================================
--- /dev/null
+++ clang/test/CodeGenCUDA/kernel-arg-name-metadata.cu
@@ -0,0 +1,16 @@
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fhip-kernel-arg-name \
+// RUN:     -fcuda-is-device -emit-llvm -o - -x hip %s \
+// RUN:     | FileCheck %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \
+// RUN:     -fcuda-is-device -emit-llvm -o - -x hip %s \
+// RUN:     | FileCheck -check-prefix=NEG %s
+
+#include "Inputs/cuda.h"
+
+// CHECK: define{{.*}} amdgpu_kernel void @_Z6kerneliPf({{.*}} !kernel_arg_name [[MD:![0-9]+]]
+// NEG-NOT: define{{.*}} amdgpu_kernel void @_Z6kerneliPf({{.*}} !kernel_arg_name
+__global__ void kernel(int arg1, float *arg2) {
+}
+
+// CHECK: [[MD]] = !{!"arg1", !"arg2"}
Index: clang/lib/Driver/ToolChains/Clang.cpp
===================================================================
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -6350,6 +6350,9 @@
     if (Args.hasFlag(options::OPT_fgpu_allow_device_init,
                      options::OPT_fno_gpu_allow_device_init, false))
       CmdArgs.push_back("-fgpu-allow-device-init");
+    if (Args.hasFlag(options::OPT_fhip_kernel_arg_name,
+                     options::OPT_fno_hip_kernel_arg_name, false))
+      CmdArgs.push_back("-fhip-kernel-arg-name");
   }
 
   if (IsCuda || IsHIP) {
Index: clang/lib/CodeGen/CodeGenModule.h
===================================================================
--- clang/lib/CodeGen/CodeGenModule.h
+++ clang/lib/CodeGen/CodeGenModule.h
@@ -1462,7 +1462,7 @@
   /// \param FN is a pointer to IR function being generated.
   /// \param FD is a pointer to function declaration if any.
   /// \param CGF is a pointer to CodeGenFunction that generates this function.
-  void GenOpenCLArgMetadata(llvm::Function *FN,
+  void GenKernelArgMetadata(llvm::Function *FN,
                             const FunctionDecl *FD = nullptr,
                             CodeGenFunction *CGF = nullptr);
 
Index: clang/lib/CodeGen/CodeGenModule.cpp
===================================================================
--- clang/lib/CodeGen/CodeGenModule.cpp
+++ clang/lib/CodeGen/CodeGenModule.cpp
@@ -1703,7 +1703,7 @@
   }
 }
 
-void CodeGenModule::GenOpenCLArgMetadata(llvm::Function *Fn,
+void CodeGenModule::GenKernelArgMetadata(llvm::Function *Fn,
                                          const FunctionDecl *FD,
                                          CodeGenFunction *CGF) {
   assert(((FD && CGF) || (!FD && !CGF)) &&
@@ -1735,6 +1735,11 @@
   if (FD && CGF)
     for (unsigned i = 0, e = FD->getNumParams(); i != e; ++i) {
       const ParmVarDecl *parm = FD->getParamDecl(i);
+      // Get argument name.
+      argNames.push_back(llvm::MDString::get(VMContext, parm->getName()));
+
+      if (!getLangOpts().OpenCL)
+        continue;
       QualType ty = parm->getType();
       std::string typeQuals;
 
@@ -1753,9 +1758,6 @@
       } else
         accessQuals.push_back(llvm::MDString::get(VMContext, "none"));
 
-      // Get argument name.
-      argNames.push_back(llvm::MDString::get(VMContext, parm->getName()));
-
       auto getTypeSpelling = [&](QualType Ty) {
         auto typeName = Ty.getUnqualifiedType().getAsString(Policy);
 
@@ -1828,17 +1830,20 @@
       argTypeQuals.push_back(llvm::MDString::get(VMContext, typeQuals));
     }
 
-  Fn->setMetadata("kernel_arg_addr_space",
-                  llvm::MDNode::get(VMContext, addressQuals));
-  Fn->setMetadata("kernel_arg_access_qual",
-                  llvm::MDNode::get(VMContext, accessQuals));
-  Fn->setMetadata("kernel_arg_type",
-                  llvm::MDNode::get(VMContext, argTypeNames));
-  Fn->setMetadata("kernel_arg_base_type",
-                  llvm::MDNode::get(VMContext, argBaseTypeNames));
-  Fn->setMetadata("kernel_arg_type_qual",
-                  llvm::MDNode::get(VMContext, argTypeQuals));
-  if (getCodeGenOpts().EmitOpenCLArgMetadata)
+  if (getLangOpts().OpenCL) {
+    Fn->setMetadata("kernel_arg_addr_space",
+                    llvm::MDNode::get(VMContext, addressQuals));
+    Fn->setMetadata("kernel_arg_access_qual",
+                    llvm::MDNode::get(VMContext, accessQuals));
+    Fn->setMetadata("kernel_arg_type",
+                    llvm::MDNode::get(VMContext, argTypeNames));
+    Fn->setMetadata("kernel_arg_base_type",
+                    llvm::MDNode::get(VMContext, argBaseTypeNames));
+    Fn->setMetadata("kernel_arg_type_qual",
+                    llvm::MDNode::get(VMContext, argTypeQuals));
+  }
+  if (getCodeGenOpts().EmitOpenCLArgMetadata ||
+      getCodeGenOpts().HIPSaveKernelArgName)
     Fn->setMetadata("kernel_arg_name",
                     llvm::MDNode::get(VMContext, argNames));
 }
Index: clang/lib/CodeGen/CodeGenFunction.h
===================================================================
--- clang/lib/CodeGen/CodeGenFunction.h
+++ clang/lib/CodeGen/CodeGenFunction.h
@@ -1976,8 +1976,7 @@
 
   /// Add OpenCL kernel arg metadata and the kernel attribute metadata to
   /// the function metadata.
-  void EmitOpenCLKernelMetadata(const FunctionDecl *FD,
-                                llvm::Function *Fn);
+  void EmitKernelMetadata(const FunctionDecl *FD, llvm::Function *Fn);
 
 public:
   CodeGenFunction(CodeGenModule &cgm, bool suppressNewContext=false);
Index: clang/lib/CodeGen/CodeGenFunction.cpp
===================================================================
--- clang/lib/CodeGen/CodeGenFunction.cpp
+++ clang/lib/CodeGen/CodeGenFunction.cpp
@@ -597,15 +597,17 @@
                             "decoded_addr");
 }
 
-void CodeGenFunction::EmitOpenCLKernelMetadata(const FunctionDecl *FD,
-                                               llvm::Function *Fn)
-{
-  if (!FD->hasAttr<OpenCLKernelAttr>())
+void CodeGenFunction::EmitKernelMetadata(const FunctionDecl *FD,
+                                         llvm::Function *Fn) {
+  if (!FD->hasAttr<OpenCLKernelAttr>() && !FD->hasAttr<CUDAGlobalAttr>())
     return;
 
   llvm::LLVMContext &Context = getLLVMContext();
 
-  CGM.GenOpenCLArgMetadata(Fn, FD, this);
+  CGM.GenKernelArgMetadata(Fn, FD, this);
+
+  if (!getLangOpts().OpenCL)
+    return;
 
   if (const VecTypeHintAttr *A = FD->getAttr<VecTypeHintAttr>()) {
     QualType HintQTy = A->getTypeHint();
@@ -920,9 +922,10 @@
   if (D && D->hasAttr<NoProfileFunctionAttr>())
     Fn->addFnAttr(llvm::Attribute::NoProfile);
 
-  if (FD && getLangOpts().OpenCL) {
+  if (FD && (getLangOpts().OpenCL ||
+             (getLangOpts().HIP && getLangOpts().CUDAIsDevice))) {
     // Add metadata for a kernel function.
-    EmitOpenCLKernelMetadata(FD, Fn);
+    EmitKernelMetadata(FD, Fn);
   }
 
   // If we are checking function types, emit a function type signature as
Index: clang/lib/CodeGen/CGDeclCXX.cpp
===================================================================
--- clang/lib/CodeGen/CGDeclCXX.cpp
+++ clang/lib/CodeGen/CGDeclCXX.cpp
@@ -707,7 +707,7 @@
   // dynamic resource allocation on the device and program scope variables are
   // destroyed by the runtime when program is released.
   if (getLangOpts().OpenCL) {
-    GenOpenCLArgMetadata(Fn);
+    GenKernelArgMetadata(Fn);
     Fn->setCallingConv(llvm::CallingConv::SPIR_KERNEL);
   }
 
Index: clang/include/clang/Driver/Options.td
===================================================================
--- clang/include/clang/Driver/Options.td
+++ clang/include/clang/Driver/Options.td
@@ -1013,6 +1013,12 @@
   BothFlags<[], " that single precision floating-point divide and sqrt used in "
   "the program source are correctly rounded (HIP device compilation only)">>,
   ShouldParseIf<hip.KeyPath>;
+defm hip_kernel_arg_name : BoolFOption<"hip-kernel-arg-name",
+  CodeGenOpts<"HIPSaveKernelArgName">, DefaultFalse,
+  PosFlag<SetTrue, [CC1Option], "Specify">,
+  NegFlag<SetFalse, [], "Don't specify">,
+  BothFlags<[], " that kernel argument names are preserved (HIP only)">>,
+  ShouldParseIf<hip.KeyPath>;
 def hipspv_pass_plugin_EQ : Joined<["--"], "hipspv-pass-plugin=">,
   Group<Link_Group>, MetaVarName<"<dsopath>">,
   HelpText<"path to a pass plugin for HIP to SPIR-V passes.">;
Index: clang/include/clang/Basic/CodeGenOptions.def
===================================================================
--- clang/include/clang/Basic/CodeGenOptions.def
+++ clang/include/clang/Basic/CodeGenOptions.def
@@ -189,6 +189,7 @@
 CODEGENOPT(NullPointerIsValid , 1, 0) ///< Assume Null pointer deference is defined.
 CODEGENOPT(OpenCLCorrectlyRoundedDivSqrt, 1, 0) ///< -cl-fp32-correctly-rounded-divide-sqrt
 CODEGENOPT(HIPCorrectlyRoundedDivSqrt, 1, 1) ///< -fno-hip-fp32-correctly-rounded-divide-sqrt
+CODEGENOPT(HIPSaveKernelArgName, 1, 0) ///< Set when -fhip-kernel-arg-name is enabled.
 CODEGENOPT(UniqueInternalLinkageNames, 1, 0) ///< Internal Linkage symbols get unique names.
 CODEGENOPT(SplitMachineFunctions, 1, 0) ///< Split machine functions using profile information.
 
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to