jdoerfert updated this revision to Diff 382171.
jdoerfert added a comment.

Actually use the new wrapper for OpenMP offload targeting AMD (and the new RT)


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D112504/new/

https://reviews.llvm.org/D112504

Files:
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/lib/CodeGen/CGGPUBuiltin.cpp
  openmp/libomptarget/DeviceRTL/include/Debug.h
  openmp/libomptarget/DeviceRTL/include/Interface.h
  openmp/libomptarget/DeviceRTL/src/Debug.cpp

Index: openmp/libomptarget/DeviceRTL/src/Debug.cpp
===================================================================
--- openmp/libomptarget/DeviceRTL/src/Debug.cpp
+++ openmp/libomptarget/DeviceRTL/src/Debug.cpp
@@ -38,6 +38,15 @@
          assertion);
   __builtin_trap();
 }
+
+// We do not have a vprintf implementation for AMD GPU yet so we use a stub.
+#pragma omp begin declare variant match(device = {arch(amdgcn)})
+int32_t vprintf(const char *, void *) { return 0; }
+#pragma omp end declare variant
+
+int32_t __llvm_omp_vprintf(const char *Format, void *Arguments) {
+  return vprintf(Format, Arguments);
+}
 }
 
 /// Current indentation level for the function trace. Only accessed by thread 0.
Index: openmp/libomptarget/DeviceRTL/include/Interface.h
===================================================================
--- openmp/libomptarget/DeviceRTL/include/Interface.h
+++ openmp/libomptarget/DeviceRTL/include/Interface.h
@@ -340,6 +340,9 @@
 int32_t __kmpc_shuffle_int32(int32_t val, int16_t delta, int16_t size);
 int64_t __kmpc_shuffle_int64(int64_t val, int16_t delta, int16_t size);
 ///}
+
+/// Printf
+int32_t __llvm_omp_vprintf(const char *Format, void *Arguments);
 }
 
 #endif
Index: openmp/libomptarget/DeviceRTL/include/Debug.h
===================================================================
--- openmp/libomptarget/DeviceRTL/include/Debug.h
+++ openmp/libomptarget/DeviceRTL/include/Debug.h
@@ -46,17 +46,12 @@
 /// macro.
 /// {
 
-#ifndef __AMDGCN__
 extern "C" {
 int printf(const char *format, ...);
 }
 
 #define PRINTF(fmt, ...) (void)printf(fmt, __VA_ARGS__)
 #define PRINT(str) PRINTF("%s", str)
-#else
-#define PRINTF(fmt, ...)
-#define PRINT(str)
-#endif
 
 #define WARN(fmt, ...) PRINTF("WARNING: " #fmt, __VA_ARGS__)
 
Index: clang/lib/CodeGen/CGGPUBuiltin.cpp
===================================================================
--- clang/lib/CodeGen/CGGPUBuiltin.cpp
+++ clang/lib/CodeGen/CGGPUBuiltin.cpp
@@ -21,24 +21,30 @@
 using namespace clang;
 using namespace CodeGen;
 
-static llvm::Function *GetVprintfDeclaration(llvm::Module &M) {
+static llvm::Function *GetVprintfDeclaration(CodeGenModule &CGM) {
+  bool UsesNewOpenMPDeviceRuntime = CGM.getLangOpts().OpenMPIsDevice &&
+                                    CGM.getLangOpts().OpenMPTargetNewRuntime;
+  const char *Name =
+      UsesNewOpenMPDeviceRuntime ? "__llvm_omp_vprintf" : "vprintf";
+  llvm::Module &M = CGM.getModule();
   llvm::Type *ArgTypes[] = {llvm::Type::getInt8PtrTy(M.getContext()),
                             llvm::Type::getInt8PtrTy(M.getContext())};
   llvm::FunctionType *VprintfFuncType = llvm::FunctionType::get(
       llvm::Type::getInt32Ty(M.getContext()), ArgTypes, false);
 
-  if (auto* F = M.getFunction("vprintf")) {
+  if (auto *F = M.getFunction(Name)) {
     // Our CUDA system header declares vprintf with the right signature, so
     // nobody else should have been able to declare vprintf with a bogus
-    // signature.
+    // signature. The OpenMP device runtime provides a wrapper around vprintf
+    // which we use here. The signature should match though.
     assert(F->getFunctionType() == VprintfFuncType);
     return F;
   }
 
-  // vprintf doesn't already exist; create a declaration and insert it into the
-  // module.
+  // vprintf, or for OpenMP device offloading the vprintf wrapper, doesn't
+  // already exist; create a declaration and insert it into the module.
   return llvm::Function::Create(
-      VprintfFuncType, llvm::GlobalVariable::ExternalLinkage, "vprintf", &M);
+      VprintfFuncType, llvm::GlobalVariable::ExternalLinkage, Name, &M);
 }
 
 // Transforms a call to printf into a call to the NVPTX vprintf syscall (which
@@ -117,7 +123,7 @@
   }
 
   // Invoke vprintf and return.
-  llvm::Function* VprintfFunc = GetVprintfDeclaration(CGM.getModule());
+  llvm::Function *VprintfFunc = GetVprintfDeclaration(CGM);
   return RValue::get(Builder.CreateCall(
       VprintfFunc, {Args[0].getRValue(*this).getScalarVal(), BufferPtr}));
 }
@@ -130,6 +136,12 @@
          E->getBuiltinCallee() == Builtin::BI__builtin_printf);
   assert(E->getNumArgs() >= 1); // printf always has at least one arg.
 
+  // For OpenMP target offloading we go with a modified nvptx printf method.
+  // Basically creating calls to __llvm_omp_vprintf with the arguments and
+  // dealing with the details in the device runtime itself.
+  if (getLangOpts().OpenMPIsDevice && getLangOpts().OpenMPTargetNewRuntime)
+    return EmitNVPTXDevicePrintfCallExpr(E, ReturnValue);
+
   CallArgList CallArgs;
   EmitCallArgs(CallArgs,
                E->getDirectCallee()->getType()->getAs<FunctionProtoType>(),
Index: clang/lib/CodeGen/CGBuiltin.cpp
===================================================================
--- clang/lib/CodeGen/CGBuiltin.cpp
+++ clang/lib/CodeGen/CGBuiltin.cpp
@@ -5027,7 +5027,8 @@
     if (getTarget().getTriple().isNVPTX())
       return EmitNVPTXDevicePrintfCallExpr(E, ReturnValue);
     if (getTarget().getTriple().getArch() == Triple::amdgcn &&
-        getLangOpts().HIP)
+        (getLangOpts().HIP || (getLangOpts().OpenMPIsDevice &&
+                               getLangOpts().OpenMPTargetNewRuntime)))
       return EmitAMDGPUDevicePrintfCallExpr(E, ReturnValue);
     break;
   case Builtin::BI__builtin_canonicalize:
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to