[clang] [llvm] Boxu.zhang.llvm (PR #140671)

via cfe-commits Mon, 19 May 2025 21:04:30 -0700

https://github.com/boxu-zhang created 
https://github.com/llvm/llvm-project/pull/140671


Last week, someone asking me to implement printf function on OpenCL with NVPTX 
which is strange for me, because I remember that the libdevice of CUDA has 
already implemented it and it should also work for OpenCL. However, it's not 
supported. Testing it with this simple case, the IR showed the kernel calls 
exact the printf function itself which is declared but not defined.
`
int printf(__constant const char* st, ...) __attribute__((format(printf, 1, 
2)));

__kernel void test_printf() {
  printf("hello, printf on nvptx.");
}
`

And I did some research, found an obvious bug in 
"Sema::deduceOpenCLAddressSpace". In this function, the first 'if' statement 
checks whether the input 'Decl' type has address space, if it has, then just 
return. This logic is not correct for type of the 'format' string argument of 
'printf' function. The '__constant const char * st' is the argument declaration 
which type is '__constant const char *' and has address space 
already(__constant, LangAS::opencl_constant). Call 'hasAddressSpace()' returns 
false with this type, which is obvious a bug. 
The reason is simple, it should check whether the 'Decl' type is a PointerType 
first, and then check whether the PointeeType has address space instead. The 
address space is only reasonable for PointeeType. 
See the commits for detail.

Also I made some other changes to support implementing builtin printf with 
vprintf on OpenCL when the target is NVPTX.

>From a3e883a4dc55029b8a7e3c591c3bc85289869686 Mon Sep 17 00:00:00 2001
From: "boxu.zhang" <boxu.zh...@hotmail.com>
Date: Wed, 20 Dec 2023 17:35:25 +0800
Subject: [PATCH 1/3] Make 'UnrollMaxUpperBound' to be overridable by target.
 The default value is still 8 and the command line argument
 '--unroll-max-upperbound' takes final effect if provided.

---
 llvm/include/llvm/Analysis/TargetTransformInfo.h | 4 ++++
 llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp    | 9 ++++++---
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h 
b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index f5114fa40c70a..735be3680aea0 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -560,6 +560,10 @@ class TargetTransformInfo {
     // (set to UINT_MAX to disable). This does not apply in cases where the
     // loop is being fully unrolled.
     unsigned MaxCount;
+    /// Set the maximum upper bound of trip count. Allowing the MaxUpperBound
+    /// to be overrided by a target gives more flexiblity on certain cases.
+    /// By default, MaxUpperBound uses UnrollMaxUpperBound which value is 8.
+    unsigned MaxUpperBound;
     /// Set the maximum unrolling factor for full unrolling. Like MaxCount, but
     /// applies even if full unrolling is selected. This allows a target to 
fall
     /// back to Partial unrolling if full unrolling is above 
FullUnrollMaxCount.
diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp 
b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index f14541a1a037e..7cfeb019af972 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -200,6 +200,7 @@ TargetTransformInfo::UnrollingPreferences 
llvm::gatherUnrollingPreferences(
   UP.Count = 0;
   UP.DefaultUnrollRuntimeCount = 8;
   UP.MaxCount = std::numeric_limits<unsigned>::max();
+  UP.MaxUpperBound = UnrollMaxUpperBound;
   UP.FullUnrollMaxCount = std::numeric_limits<unsigned>::max();
   UP.BEInsns = 2;
   UP.Partial = false;
@@ -237,6 +238,8 @@ TargetTransformInfo::UnrollingPreferences 
llvm::gatherUnrollingPreferences(
     UP.MaxPercentThresholdBoost = UnrollMaxPercentThresholdBoost;
   if (UnrollMaxCount.getNumOccurrences() > 0)
     UP.MaxCount = UnrollMaxCount;
+  if (UnrollMaxUpperBound.getNumOccurrences() > 0)
+    UP.MaxUpperBound = UnrollMaxUpperBound;
   if (UnrollFullMaxCount.getNumOccurrences() > 0)
     UP.FullUnrollMaxCount = UnrollFullMaxCount;
   if (UnrollAllowPartial.getNumOccurrences() > 0)
@@ -777,7 +780,7 @@ shouldPragmaUnroll(Loop *L, const PragmaInfo &PInfo,
     return TripCount;
 
   if (PInfo.PragmaEnableUnroll && !TripCount && MaxTripCount &&
-      MaxTripCount <= UnrollMaxUpperBound)
+      MaxTripCount <= UP.MaxUpperBound)
     return MaxTripCount;
 
   // if didn't return until here, should continue to other priorties
@@ -952,7 +955,7 @@ bool llvm::computeUnrollCount(
   // cost of exact full unrolling.  As such, if we have an exact count and
   // found it unprofitable, we'll never chose to bounded unroll.
   if (!TripCount && MaxTripCount && (UP.UpperBound || MaxOrZero) &&
-      MaxTripCount <= UnrollMaxUpperBound) {
+      MaxTripCount <= UP.MaxUpperBound) {
     UP.Count = MaxTripCount;
     if (auto UnrollFactor = shouldFullUnroll(L, TTI, DT, SE, EphValues,
                                              MaxTripCount, UCE, UP)) {
@@ -1026,7 +1029,7 @@ bool llvm::computeUnrollCount(
   }
 
   // Don't unroll a small upper bound loop unless user or TTI asked to do so.
-  if (MaxTripCount && !UP.Force && MaxTripCount < UnrollMaxUpperBound) {
+  if (MaxTripCount && !UP.Force && MaxTripCount < UP.MaxUpperBound) {
     UP.Count = 0;
     return false;
   }

>From ecc3e286cfe97ff033c00806658f6e69844c5434 Mon Sep 17 00:00:00 2001
From: "boxu.zhang" <boxu.zh...@hotmail.com>
Date: Tue, 20 May 2025 12:22:45 +0800
Subject: [PATCH 2/3] [Clang][OpenCL][NVPTX] precommit test for using vprintf
 to implement builtin printf on OpenCL with NVPTX

---
 clang/test/CodeGenOpenCL/test-printf-nvptx.cl | 14 ++++++++++++++
 1 file changed, 14 insertions(+)
 create mode 100644 clang/test/CodeGenOpenCL/test-printf-nvptx.cl

diff --git a/clang/test/CodeGenOpenCL/test-printf-nvptx.cl 
b/clang/test/CodeGenOpenCL/test-printf-nvptx.cl
new file mode 100644
index 0000000000000..6b8aa873a47b6
--- /dev/null
+++ b/clang/test/CodeGenOpenCL/test-printf-nvptx.cl
@@ -0,0 +1,14 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 4
+// RUN: %clang_cc1 -cl-std=CL3.0 -triple nvptx-- -disable-llvm-passes 
-emit-llvm -o - %s | FileCheck -check-prefixes=NV %s
+
+int printf(__constant const char* st, ...) __attribute__((format(printf, 1, 
2)));
+
+// NV-LABEL: define dso_local spir_kernel void @test_printf(
+// NV-SAME: ) #[[ATTR0:[0-9]+]] !kernel_arg_addr_space !4 
!kernel_arg_access_qual !4 !kernel_arg_type !4 !kernel_arg_base_type !4 
!kernel_arg_type_qual !4 {
+// NV-NEXT:  entry:
+// NV-NEXT:    [[CALL:%.*]] = call i32 (ptr addrspace(4), ...) @printf(ptr 
addrspace(4) noundef @.str) #[[ATTR2:[0-9]+]]
+// NV-NEXT:    ret void
+//
+__kernel void test_printf() {
+  printf("hello, printf on nvptx.");
+}

>From 64c686105c830eab5a83cbdc62a67b07b0314072 Mon Sep 17 00:00:00 2001
From: "boxu.zhang" <boxu.zh...@hotmail.com>
Date: Tue, 20 May 2025 12:28:02 +0800
Subject: [PATCH 3/3] [Clang][OpenCL][NVPTX] using vprintf to implement builtin
 printf on OpenCL with NVPTX

---
 clang/lib/AST/ASTContext.cpp                  |  5 +++++
 clang/lib/AST/Decl.cpp                        |  4 +++-
 clang/lib/CodeGen/CGGPUBuiltin.cpp            | 11 +++++++++--
 clang/lib/Sema/SemaDecl.cpp                   |  8 ++++++--
 clang/test/CodeGenOpenCL/test-printf-nvptx.cl |  2 +-
 5 files changed, 24 insertions(+), 6 deletions(-)

diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
index 0395b3e47ab6f..8e3e6d3331b63 100644
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@@ -11481,6 +11481,11 @@ static QualType DecodeTypeFromStr(const char *&Str, 
const ASTContext &Context,
     // FIXME: There's no way to have a built-in with an rvalue ref arg.
     case 'C':
       Type = Type.withConst();
+      // adjust 'const char *' to 'const char __constant *' on OpenCL
+      if (Context.getLangOpts().OpenCL &&
+          Type.getTypePtr() == Context.CharTy.getTypePtr()) {
+        Type = Context.getAddrSpaceQualType(Type, LangAS::opencl_constant);
+      }
       break;
     case 'D':
       Type = Context.getVolatileType(Type);
diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp
index c2ea155679193..a0b334e4323c8 100644
--- a/clang/lib/AST/Decl.cpp
+++ b/clang/lib/AST/Decl.cpp
@@ -3600,8 +3600,10 @@ unsigned FunctionDecl::getBuiltinID(bool 
ConsiderWrapperFunctions) const {
 
   // OpenCL v1.2 s6.9.f - The library functions defined in
   // the C99 standard headers are not available.
+  // EXCEPTION: printf is supported for AMDGPU
   if (Context.getLangOpts().OpenCL &&
-      Context.BuiltinInfo.isPredefinedLibFunction(BuiltinID))
+      Context.BuiltinInfo.isPredefinedLibFunction(BuiltinID) &&
+      BuiltinID != Builtin::BIprintf)
     return 0;
 
   // CUDA does not have device-side standard library. printf and malloc are the
diff --git a/clang/lib/CodeGen/CGGPUBuiltin.cpp 
b/clang/lib/CodeGen/CGGPUBuiltin.cpp
index e465789a003eb..aa7fa5426bff7 100644
--- a/clang/lib/CodeGen/CGGPUBuiltin.cpp
+++ b/clang/lib/CodeGen/CGGPUBuiltin.cpp
@@ -135,6 +135,7 @@ bool containsNonScalarVarargs(CodeGenFunction *CGF, const 
CallArgList &Args) {
 RValue EmitDevicePrintfCallExpr(const CallExpr *E, CodeGenFunction *CGF,
                                 llvm::Function *Decl, bool WithSizeArg) {
   CodeGenModule &CGM = CGF->CGM;
+  llvm::LLVMContext &Ctx = CGM.getLLVMContext();
   CGBuilderTy &Builder = CGF->Builder;
   assert(E->getBuiltinCallee() == Builtin::BIprintf);
   assert(E->getNumArgs() >= 1); // printf always has at least one arg.
@@ -155,9 +156,15 @@ RValue EmitDevicePrintfCallExpr(const CallExpr *E, 
CodeGenFunction *CGF,
 
   auto r = packArgsIntoNVPTXFormatBuffer(CGF, Args);
   llvm::Value *BufferPtr = r.first;
+  llvm::Value *Fmt = Args[0].getRValue(*CGF).getScalarVal();
 
-  llvm::SmallVector<llvm::Value *, 3> Vec = {
-      Args[0].getRValue(*CGF).getScalarVal(), BufferPtr};
+  // For OpenCL, the default addrspace of 'format' argument is 
LangAS::opencl_constant,
+  // however, the 'vprintf' requires it to be unqualified 'ptr' type. Do 
pointer cast if
+  // it's the case.
+  if (CGM.getContext().getLangOpts().OpenCL)
+    Fmt = Builder.CreatePointerCast(Fmt, llvm::PointerType::getUnqual(Ctx));
+
+  llvm::SmallVector<llvm::Value *, 3> Vec = {Fmt, BufferPtr};
   if (WithSizeArg) {
     // Passing > 32bit of data as a local alloca doesn't work for nvptx or
     // amdgpu
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index ffbe317d55999..6792c768977c4 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -7008,8 +7008,12 @@ bool Sema::inferObjCARCLifetime(ValueDecl *decl) {
 }
 
 void Sema::deduceOpenCLAddressSpace(ValueDecl *Decl) {
-  if (Decl->getType().hasAddressSpace())
-    return;
+  // Address space is only meaningful for pointer type
+  if (Decl->getType()->isPointerType()) {
+    const PointerType *T = dyn_cast<PointerType>(Decl->getType().getTypePtr());
+    if (T->getPointeeType().hasAddressSpace())
+      return;
+  }
   if (Decl->getType()->isDependentType())
     return;
   if (VarDecl *Var = dyn_cast<VarDecl>(Decl)) {
diff --git a/clang/test/CodeGenOpenCL/test-printf-nvptx.cl 
b/clang/test/CodeGenOpenCL/test-printf-nvptx.cl
index 6b8aa873a47b6..4a674c396c23c 100644
--- a/clang/test/CodeGenOpenCL/test-printf-nvptx.cl
+++ b/clang/test/CodeGenOpenCL/test-printf-nvptx.cl
@@ -6,7 +6,7 @@ int printf(__constant const char* st, ...) 
__attribute__((format(printf, 1, 2)))
 // NV-LABEL: define dso_local spir_kernel void @test_printf(
 // NV-SAME: ) #[[ATTR0:[0-9]+]] !kernel_arg_addr_space !4 
!kernel_arg_access_qual !4 !kernel_arg_type !4 !kernel_arg_base_type !4 
!kernel_arg_type_qual !4 {
 // NV-NEXT:  entry:
-// NV-NEXT:    [[CALL:%.*]] = call i32 (ptr addrspace(4), ...) @printf(ptr 
addrspace(4) noundef @.str) #[[ATTR2:[0-9]+]]
+// NV-NEXT:    [[TMP0:%.*]] = call i32 @vprintf(ptr addrspacecast (ptr 
addrspace(4) @.str to ptr), ptr null)
 // NV-NEXT:    ret void
 //
 __kernel void test_printf() {

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] Boxu.zhang.llvm (PR #140671)

Reply via email to