[clang] [llvm] Enable OpenCL hostcall printf (WIP) (PR #72556)

2023-11-27 Thread via cfe-commits

llvmbot wrote:



@llvm/pr-subscribers-clang-codegen

@llvm/pr-subscribers-clang

Author: Vikram Hegde (vikramRH)


Changes

Kindly review top commit here, The builtin specific changes are up for review 
in a seperate patch (https://github.com/llvm/llvm-project/pull/72554)

Few implementation details,
1. Hostcall printf is now default for both HIP and OpenCL.
2. The implementation adds vector processing support both for hostcall and 
buffered cases. The vector elements are extracted and pushed onto the buffer 
individually (each alingned to 8 byte boundary)
3. for OpenCL hostcall case, The format string pointer is addrspace casted to 
generic address space to be compatible with hostcall device lib functions.

---

Patch is 89.24 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/72556.diff


9 Files Affected:

- (modified) clang/include/clang/Basic/BuiltinsAMDGPU.def (+8) 
- (modified) clang/lib/AST/Decl.cpp (+7) 
- (modified) clang/lib/Basic/Targets/AMDGPU.cpp (+2) 
- (modified) clang/lib/CodeGen/CGBuiltin.cpp (+7-1) 
- (modified) clang/lib/CodeGen/CGGPUBuiltin.cpp (+37-6) 
- (modified) clang/lib/Driver/ToolChains/Clang.cpp (+10) 
- (modified) clang/test/CodeGenOpenCL/amdgpu-printf.cl (+756-1) 
- (modified) llvm/include/llvm/Transforms/Utils/AMDGPUEmitPrintf.h (+1-1) 
- (modified) llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp (+90-57) 


``diff
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def 
b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index a19c8bd5f219ec6..1799c72806bfdd4 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -21,6 +21,10 @@
 #if defined(BUILTIN) && !defined(TARGET_BUILTIN)
 #   define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) BUILTIN(ID, TYPE, ATTRS)
 #endif
+
+#if defined(BUILTIN) && !defined(LANGBUILTIN)
+#define LANGBUILTIN(ID, TYPE, ATTRS, BUILTIN_LANG) BUILTIN(ID, TYPE, ATTRS)
+#endif
 
//===--===//
 // SI+ only builtins.
 
//===--===//
@@ -406,5 +410,9 @@ TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_fp8_f32, "iffiIb", 
"nc", "fp8-insts")
 TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_bf8_f32, "ifiiIi", "nc", "fp8-insts")
 TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_fp8_f32, "ifiiIi", "nc", "fp8-insts")
 
+// OpenCL
+LANGBUILTIN(printf, "icC*4.", "fp:0:", ALL_OCL_LANGUAGES)
+
 #undef BUILTIN
 #undef TARGET_BUILTIN
+#undef LANGBUILTIN
diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp
index c5c2edf1bfe3aba..2597422bdd521a0 100644
--- a/clang/lib/AST/Decl.cpp
+++ b/clang/lib/AST/Decl.cpp
@@ -49,6 +49,7 @@
 #include "clang/Basic/SourceLocation.h"
 #include "clang/Basic/SourceManager.h"
 #include "clang/Basic/Specifiers.h"
+#include "clang/Basic/TargetBuiltins.h"
 #include "clang/Basic/TargetCXXABI.h"
 #include "clang/Basic/TargetInfo.h"
 #include "clang/Basic/Visibility.h"
@@ -3598,6 +3599,12 @@ unsigned FunctionDecl::getBuiltinID(bool 
ConsiderWrapperFunctions) const {
   if (!ConsiderWrapperFunctions && getStorageClass() == SC_Static)
 return 0;
 
+  // AMDGCN implementation supports printf as a builtin
+  // for OpenCL
+  if (Context.getTargetInfo().getTriple().isAMDGCN() &&
+  Context.getLangOpts().OpenCL && BuiltinID == AMDGPU::BIprintf)
+return BuiltinID;
+
   // OpenCL v1.2 s6.9.f - The library functions defined in
   // the C99 standard headers are not available.
   if (Context.getLangOpts().OpenCL &&
diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp 
b/clang/lib/Basic/Targets/AMDGPU.cpp
index 409ae32ab424215..307cfa49f54e926 100644
--- a/clang/lib/Basic/Targets/AMDGPU.cpp
+++ b/clang/lib/Basic/Targets/AMDGPU.cpp
@@ -91,6 +91,8 @@ static constexpr Builtin::Info BuiltinInfo[] = {
   {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE)   
\
   {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
+#define LANGBUILTIN(ID, TYPE, ATTRS, LANG) 
\
+  {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, LANG},
 #include "clang/Basic/BuiltinsAMDGPU.def"
 };
 
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 09309a3937fb613..8d51df24c7872b7 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -2458,6 +2458,11 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl 
GD, unsigned BuiltinID,
   ().getLongDoubleFormat() == ::APFloat::IEEEquad())
 BuiltinID = mutateLongDoubleBuiltin(BuiltinID);
 
+   // Mutate the printf builtin ID so that we use the same CodeGen path for
+   // HIP and OpenCL with AMDGPU targets.
+   if (getTarget().getTriple().isAMDGCN() && BuiltinID == AMDGPU::BIprintf)
+ BuiltinID = Builtin::BIprintf;
+
   // If the builtin has been declared explicitly with an assembler label,
   // disable the 

[clang] [llvm] Enable OpenCL hostcall printf (WIP) (PR #72556)

2023-11-27 Thread Vikram Hegde via cfe-commits

https://github.com/vikramRH ready_for_review 
https://github.com/llvm/llvm-project/pull/72556
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] Enable OpenCL hostcall printf (WIP) (PR #72556)

2023-11-24 Thread Vikram Hegde via cfe-commits


@@ -26,28 +26,31 @@ using namespace llvm;
 
 #define DEBUG_TYPE "amdgpu-emit-printf"
 
-static Value *fitArgInto64Bits(IRBuilder<> , Value *Arg) {
+static Value *fitArgInto64Bits(IRBuilder<> , Value *Arg,
+   bool IsBuffered) {
+  const DataLayout  = 
Builder.GetInsertBlock()->getModule()->getDataLayout();
   auto Int64Ty = Builder.getInt64Ty();
   auto Ty = Arg->getType();
 
   if (auto IntTy = dyn_cast(Ty)) {
-switch (IntTy->getBitWidth()) {
-case 32:
-  return Builder.CreateZExt(Arg, Int64Ty);
-case 64:
-  return Arg;
+if (IntTy->getBitWidth() < 64) {
+  return Builder.CreateZExt(Arg, Builder.getInt64Ty());
 }
   }
 
-  if (Ty->getTypeID() == Type::DoubleTyID) {
+  if (Ty->isFloatingPointTy()) {
+if (DL.getTypeAllocSize(Ty) < 8)
+  Arg = Builder.CreateFPExt(Arg, Builder.getDoubleTy());

vikramRH wrote:

The type cast is necessary  for types such as _Float16, which is not handled at 
argument promotion. I have added a test case to show the same

https://github.com/llvm/llvm-project/pull/72556
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] Enable OpenCL hostcall printf (WIP) (PR #72556)

2023-11-24 Thread Vikram Hegde via cfe-commits


@@ -26,28 +26,31 @@ using namespace llvm;
 
 #define DEBUG_TYPE "amdgpu-emit-printf"
 
-static Value *fitArgInto64Bits(IRBuilder<> , Value *Arg) {
+static Value *fitArgInto64Bits(IRBuilder<> , Value *Arg,
+   bool IsBuffered) {
+  const DataLayout  = 
Builder.GetInsertBlock()->getModule()->getDataLayout();
   auto Int64Ty = Builder.getInt64Ty();
   auto Ty = Arg->getType();
 
   if (auto IntTy = dyn_cast(Ty)) {
-switch (IntTy->getBitWidth()) {
-case 32:
-  return Builder.CreateZExt(Arg, Int64Ty);
-case 64:
-  return Arg;
+if (IntTy->getBitWidth() < 64) {
+  return Builder.CreateZExt(Arg, Builder.getInt64Ty());
 }
   }
 
-  if (Ty->getTypeID() == Type::DoubleTyID) {
+  if (Ty->isFloatingPointTy()) {
+if (DL.getTypeAllocSize(Ty) < 8)
+  Arg = Builder.CreateFPExt(Arg, Builder.getDoubleTy());
+if (IsBuffered)
+  return Arg;
 return Builder.CreateBitCast(Arg, Int64Ty);
   }
 
-  if (isa(Ty)) {
+  if (!IsBuffered && isa(Ty)) {
 return Builder.CreatePtrToInt(Arg, Int64Ty);

vikramRH wrote:

The pointer is just pushed onto the buffer. The cast is necessary for the 
hostcall case to be compatible with device lib functions

https://github.com/llvm/llvm-project/pull/72556
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] Enable OpenCL hostcall printf (WIP) (PR #72556)

2023-11-24 Thread Vikram Hegde via cfe-commits


@@ -170,20 +173,49 @@ static Value *appendString(IRBuilder<> , Value 
*Desc, Value *Arg,
   return callAppendStringN(Builder, Desc, Arg, Length, IsLast);
 }
 
+static Value *appendVectorArg(IRBuilder<> , Value *Desc, Value *Arg,
+  bool IsLast, bool IsBuffered) {
+  assert(Arg->getType()->isVectorTy() && "incorrent append* function");
+  auto VectorTy = dyn_cast(Arg->getType());
+  auto Zero = Builder.getInt64(0);
+  if (VectorTy) {

vikramRH wrote:

I have changed this code a little now so that only FixedVectorTypes are 
handled. This should be okay since the OCL specs specifically say only vectors 
of length 2,3,4,8 and 16 are supported for printf.

https://github.com/llvm/llvm-project/pull/72556
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] Enable OpenCL hostcall printf (WIP) (PR #72556)

2023-11-24 Thread Vikram Hegde via cfe-commits


@@ -194,6 +226,8 @@ static void locateCStrings(SparseBitVector<8> , 
StringRef Str) {
   SpecPos += 2;
   continue;
 }
+if (Str.find_first_of("v", SpecPos) != StringRef::npos)

vikramRH wrote:

Fixed

https://github.com/llvm/llvm-project/pull/72556
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] Enable OpenCL hostcall printf (WIP) (PR #72556)

2023-11-24 Thread Vikram Hegde via cfe-commits


@@ -170,20 +173,49 @@ static Value *appendString(IRBuilder<> , Value 
*Desc, Value *Arg,
   return callAppendStringN(Builder, Desc, Arg, Length, IsLast);
 }
 
+static Value *appendVectorArg(IRBuilder<> , Value *Desc, Value *Arg,
+  bool IsLast, bool IsBuffered) {
+  assert(Arg->getType()->isVectorTy() && "incorrent append* function");
+  auto VectorTy = dyn_cast(Arg->getType());
+  auto Zero = Builder.getInt64(0);
+  if (VectorTy) {
+for (unsigned int i = 0; i < VectorTy->getNumElements() - 1; i++) {
+  auto Val = Builder.CreateExtractElement(Arg, i);
+  Desc = callAppendArgs(Builder, Desc, 1,
+fitArgInto64Bits(Builder, Val, IsBuffered), Zero,
+Zero, Zero, Zero, Zero, Zero, false);
+}
+
+Value* Val =
+Builder.CreateExtractElement(Arg, VectorTy->getNumElements() - 1);
+return callAppendArgs(Builder, Desc, 1,
+  fitArgInto64Bits(Builder, Val, IsBuffered), Zero,
+  Zero, Zero, Zero, Zero, Zero, IsLast);
+  }
+  return nullptr;
+}
+
 static Value *processArg(IRBuilder<> , Value *Desc, Value *Arg,
- bool SpecIsCString, bool IsLast) {
+ bool SpecIsCString, bool IsVector, bool IsLast,
+ bool IsBuffered) {
   if (SpecIsCString && isa(Arg->getType())) {
 return appendString(Builder, Desc, Arg, IsLast);
   }
-  // If the format specifies a string but the argument is not, the frontend 
will
-  // have printed a warning. We just rely on undefined behaviour and send the
-  // argument anyway.
-  return appendArg(Builder, Desc, Arg, IsLast);
+
+  if (IsVector) {
+return appendVectorArg(Builder, Desc, Arg, IsLast, IsBuffered);
+  } 
+
+  // If the format specifies a string but the argument is not, the frontend
+  // will have printed a warning. We just rely on undefined behaviour and send
+  // the argument anyway.
+  return appendArg(Builder, Desc, Arg, IsLast, IsBuffered);
 }
 
 // Scan the format string to locate all specifiers, and mark the ones that
 // specify a string, i.e, the "%s" specifier with optional '*' characters.
-static void locateCStrings(SparseBitVector<8> , StringRef Str) {
+static void locateCStringsAndVectors(SparseBitVector<8> ,

vikramRH wrote:

Done

https://github.com/llvm/llvm-project/pull/72556
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] Enable OpenCL hostcall printf (WIP) (PR #72556)

2023-11-24 Thread Vikram Hegde via cfe-commits


@@ -1,12 +1,68 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang_cc1 -cl-std=CL1.2 -triple amdgcn-amd-amdhsa 
-disable-llvm-passes -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -cl-std=CL1.2 -triple amdgcn-amd-amdhsa 
-mprintf-kind=buffered -disable-llvm-passes -emit-llvm -o - %s | FileCheck 
--check-prefix=CHECK_BUFFERED %s
+// RUN: %clang_cc1 -cl-std=CL1.2 -triple amdgcn-amd-amdhsa 
-mprintf-kind=hostcall -disable-llvm-passes -emit-llvm -o - %s | FileCheck 
--check-prefix=CHECK_HOSTCALL %s
 
 int printf(__constant const char* st, ...) __attribute__((format(printf, 1, 
2)));

vikramRH wrote:

Done

https://github.com/llvm/llvm-project/pull/72556
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] Enable OpenCL hostcall printf (WIP) (PR #72556)

2023-11-24 Thread Vikram Hegde via cfe-commits

https://github.com/vikramRH updated 
https://github.com/llvm/llvm-project/pull/72556

>From 6ace9d0a51064be189093ca3bb42416aafadb7f6 Mon Sep 17 00:00:00 2001
From: Vikram 
Date: Fri, 10 Nov 2023 09:39:41 +
Subject: [PATCH 1/4] [AMDGPU] Treat printf as builtin for OpenCL

---
 clang/include/clang/Basic/BuiltinsAMDGPU.def | 8 
 clang/lib/AST/Decl.cpp   | 7 +++
 clang/lib/Basic/Targets/AMDGPU.cpp   | 2 ++
 clang/lib/CodeGen/CGBuiltin.cpp  | 5 +
 4 files changed, 22 insertions(+)

diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def 
b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index a19c8bd5f219ec6..1799c72806bfdd4 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -21,6 +21,10 @@
 #if defined(BUILTIN) && !defined(TARGET_BUILTIN)
 #   define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) BUILTIN(ID, TYPE, ATTRS)
 #endif
+
+#if defined(BUILTIN) && !defined(LANGBUILTIN)
+#define LANGBUILTIN(ID, TYPE, ATTRS, BUILTIN_LANG) BUILTIN(ID, TYPE, ATTRS)
+#endif
 
//===--===//
 // SI+ only builtins.
 
//===--===//
@@ -406,5 +410,9 @@ TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_fp8_f32, "iffiIb", 
"nc", "fp8-insts")
 TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_bf8_f32, "ifiiIi", "nc", "fp8-insts")
 TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_fp8_f32, "ifiiIi", "nc", "fp8-insts")
 
+// OpenCL
+LANGBUILTIN(printf, "icC*4.", "fp:0:", ALL_OCL_LANGUAGES)
+
 #undef BUILTIN
 #undef TARGET_BUILTIN
+#undef LANGBUILTIN
diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp
index c5c2edf1bfe3aba..2597422bdd521a0 100644
--- a/clang/lib/AST/Decl.cpp
+++ b/clang/lib/AST/Decl.cpp
@@ -49,6 +49,7 @@
 #include "clang/Basic/SourceLocation.h"
 #include "clang/Basic/SourceManager.h"
 #include "clang/Basic/Specifiers.h"
+#include "clang/Basic/TargetBuiltins.h"
 #include "clang/Basic/TargetCXXABI.h"
 #include "clang/Basic/TargetInfo.h"
 #include "clang/Basic/Visibility.h"
@@ -3598,6 +3599,12 @@ unsigned FunctionDecl::getBuiltinID(bool 
ConsiderWrapperFunctions) const {
   if (!ConsiderWrapperFunctions && getStorageClass() == SC_Static)
 return 0;
 
+  // AMDGCN implementation supports printf as a builtin
+  // for OpenCL
+  if (Context.getTargetInfo().getTriple().isAMDGCN() &&
+  Context.getLangOpts().OpenCL && BuiltinID == AMDGPU::BIprintf)
+return BuiltinID;
+
   // OpenCL v1.2 s6.9.f - The library functions defined in
   // the C99 standard headers are not available.
   if (Context.getLangOpts().OpenCL &&
diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp 
b/clang/lib/Basic/Targets/AMDGPU.cpp
index 409ae32ab424215..307cfa49f54e926 100644
--- a/clang/lib/Basic/Targets/AMDGPU.cpp
+++ b/clang/lib/Basic/Targets/AMDGPU.cpp
@@ -91,6 +91,8 @@ static constexpr Builtin::Info BuiltinInfo[] = {
   {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE)   
\
   {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
+#define LANGBUILTIN(ID, TYPE, ATTRS, LANG) 
\
+  {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, LANG},
 #include "clang/Basic/BuiltinsAMDGPU.def"
 };
 
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 09309a3937fb613..987909b5a62e11b 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -2458,6 +2458,11 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl 
GD, unsigned BuiltinID,
   ().getLongDoubleFormat() == ::APFloat::IEEEquad())
 BuiltinID = mutateLongDoubleBuiltin(BuiltinID);
 
+   // Mutate the printf builtin ID so that we use the same CodeGen path for
+   // HIP and OpenCL with AMDGPU targets.
+   if (getTarget().getTriple().isAMDGCN() && BuiltinID == AMDGPU::BIprintf)
+ BuiltinID = Builtin::BIprintf;
+
   // If the builtin has been declared explicitly with an assembler label,
   // disable the specialized emitting below. Ideally we should communicate the
   // rename in IR, or at least avoid generating the intrinsic calls that are

>From 040a28deef5fe7a5d9e357a898b50335992e708d Mon Sep 17 00:00:00 2001
From: Vikram 
Date: Mon, 20 Nov 2023 05:26:27 +
Subject: [PATCH 2/4] [AMDGPU] Enable OpenCL printf expansion at clang CodeGen

---
 clang/lib/CodeGen/CGBuiltin.cpp   |  3 ++-
 clang/lib/CodeGen/CGGPUBuiltin.cpp| 25 +++--
 clang/lib/Driver/ToolChains/Clang.cpp | 10 ++
 3 files changed, 31 insertions(+), 7 deletions(-)

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 987909b5a62e11b..8d51df24c7872b7 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -5622,7 +5622,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl 
GD, unsigned BuiltinID,

[clang] [llvm] Enable OpenCL hostcall printf (WIP) (PR #72556)

2023-11-19 Thread Sameer Sahasrabuddhe via cfe-commits


@@ -194,6 +226,8 @@ static void locateCStrings(SparseBitVector<8> , 
StringRef Str) {
   SpecPos += 2;
   continue;
 }
+if (Str.find_first_of("v", SpecPos) != StringRef::npos)

ssahasra wrote:

I don't think this will work as expected. It can clearly match a "v" that 
occurs after the data type. For example, it is supposed to match "%v2d", but it 
will also match "%d v". The match should be performed inside the "Spec" 
substring created below. 

https://github.com/llvm/llvm-project/pull/72556
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] Enable OpenCL hostcall printf (WIP) (PR #72556)

2023-11-19 Thread Sameer Sahasrabuddhe via cfe-commits


@@ -170,20 +173,49 @@ static Value *appendString(IRBuilder<> , Value 
*Desc, Value *Arg,
   return callAppendStringN(Builder, Desc, Arg, Length, IsLast);
 }
 
+static Value *appendVectorArg(IRBuilder<> , Value *Desc, Value *Arg,
+  bool IsLast, bool IsBuffered) {
+  assert(Arg->getType()->isVectorTy() && "incorrent append* function");
+  auto VectorTy = dyn_cast(Arg->getType());
+  auto Zero = Builder.getInt64(0);
+  if (VectorTy) {
+for (unsigned int i = 0; i < VectorTy->getNumElements() - 1; i++) {
+  auto Val = Builder.CreateExtractElement(Arg, i);
+  Desc = callAppendArgs(Builder, Desc, 1,
+fitArgInto64Bits(Builder, Val, IsBuffered), Zero,
+Zero, Zero, Zero, Zero, Zero, false);
+}
+
+Value* Val =
+Builder.CreateExtractElement(Arg, VectorTy->getNumElements() - 1);
+return callAppendArgs(Builder, Desc, 1,
+  fitArgInto64Bits(Builder, Val, IsBuffered), Zero,
+  Zero, Zero, Zero, Zero, Zero, IsLast);
+  }
+  return nullptr;
+}
+
 static Value *processArg(IRBuilder<> , Value *Desc, Value *Arg,
- bool SpecIsCString, bool IsLast) {
+ bool SpecIsCString, bool IsVector, bool IsLast,
+ bool IsBuffered) {
   if (SpecIsCString && isa(Arg->getType())) {
 return appendString(Builder, Desc, Arg, IsLast);
   }
-  // If the format specifies a string but the argument is not, the frontend 
will
-  // have printed a warning. We just rely on undefined behaviour and send the
-  // argument anyway.
-  return appendArg(Builder, Desc, Arg, IsLast);
+
+  if (IsVector) {
+return appendVectorArg(Builder, Desc, Arg, IsLast, IsBuffered);
+  } 
+
+  // If the format specifies a string but the argument is not, the frontend
+  // will have printed a warning. We just rely on undefined behaviour and send
+  // the argument anyway.

ssahasra wrote:

This is a pure whitespace change. Keeping the original formatting of the 
comment helps simplify the diff.

https://github.com/llvm/llvm-project/pull/72556
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] Enable OpenCL hostcall printf (WIP) (PR #72556)

2023-11-19 Thread Sameer Sahasrabuddhe via cfe-commits


@@ -170,20 +173,49 @@ static Value *appendString(IRBuilder<> , Value 
*Desc, Value *Arg,
   return callAppendStringN(Builder, Desc, Arg, Length, IsLast);
 }
 
+static Value *appendVectorArg(IRBuilder<> , Value *Desc, Value *Arg,
+  bool IsLast, bool IsBuffered) {
+  assert(Arg->getType()->isVectorTy() && "incorrent append* function");
+  auto VectorTy = dyn_cast(Arg->getType());
+  auto Zero = Builder.getInt64(0);
+  if (VectorTy) {

ssahasra wrote:

So the argument is ignored if it is not a FixedVectorType?

https://github.com/llvm/llvm-project/pull/72556
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] Enable OpenCL hostcall printf (WIP) (PR #72556)

2023-11-19 Thread Sameer Sahasrabuddhe via cfe-commits


@@ -26,28 +26,31 @@ using namespace llvm;
 
 #define DEBUG_TYPE "amdgpu-emit-printf"
 
-static Value *fitArgInto64Bits(IRBuilder<> , Value *Arg) {
+static Value *fitArgInto64Bits(IRBuilder<> , Value *Arg,
+   bool IsBuffered) {
+  const DataLayout  = 
Builder.GetInsertBlock()->getModule()->getDataLayout();
   auto Int64Ty = Builder.getInt64Ty();
   auto Ty = Arg->getType();
 
   if (auto IntTy = dyn_cast(Ty)) {
-switch (IntTy->getBitWidth()) {
-case 32:
-  return Builder.CreateZExt(Arg, Int64Ty);
-case 64:
-  return Arg;
+if (IntTy->getBitWidth() < 64) {
+  return Builder.CreateZExt(Arg, Builder.getInt64Ty());
 }
   }
 
-  if (Ty->getTypeID() == Type::DoubleTyID) {
+  if (Ty->isFloatingPointTy()) {
+if (DL.getTypeAllocSize(Ty) < 8)
+  Arg = Builder.CreateFPExt(Arg, Builder.getDoubleTy());

ssahasra wrote:

This typecast should not be necessary. Default argument promotions in C++ for 
variadic functions ensure that all floating point arguments are promoted to 
double. If that is not happening, can you demonstrate with a test?

https://github.com/llvm/llvm-project/pull/72556
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] Enable OpenCL hostcall printf (WIP) (PR #72556)

2023-11-19 Thread Sameer Sahasrabuddhe via cfe-commits


@@ -4742,6 +4742,16 @@ void Clang::ConstructJob(Compilation , const JobAction 
,
 Args.ClaimAllArgs(options::OPT_gen_cdb_fragment_path);
   }
 
+  if (TC.getTriple().isAMDGPU() && types::isOpenCL(Input.getType())) {
+if (Args.getLastArg(options::OPT_mprintf_kind_EQ)) {
+  CmdArgs.push_back(Args.MakeArgString(
+  "-mprintf-kind=" +
+  Args.getLastArgValue(options::OPT_mprintf_kind_EQ)));
+  // Force compiler error on invalid conversion specifiers
+  
CmdArgs.push_back(Args.MakeArgString("-Werror=format-invalid-specifier"));

ssahasra wrote:

Why is this necessary here?

https://github.com/llvm/llvm-project/pull/72556
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] Enable OpenCL hostcall printf (WIP) (PR #72556)

2023-11-19 Thread Vikram Hegde via cfe-commits


@@ -278,7 +310,13 @@ static Value *callBufferedPrintfStart(
 StringData(StringRef(), LenWithNull, LenWithNullAligned, false));
   }
 } else {
-  int AllocSize = M->getDataLayout().getTypeAllocSize(Args[i]->getType());
+  int AllocSize = 0;
+  if (OCLVectors.test(i)) {
+auto VecArg = dyn_cast(Args[i]->getType());
+assert(VecArg && "invalid vector specifier");

vikramRH wrote:

Done

https://github.com/llvm/llvm-project/pull/72556
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] Enable OpenCL hostcall printf (WIP) (PR #72556)

2023-11-19 Thread Vikram Hegde via cfe-commits


@@ -170,20 +173,46 @@ static Value *appendString(IRBuilder<> , Value 
*Desc, Value *Arg,
   return callAppendStringN(Builder, Desc, Arg, Length, IsLast);
 }
 
+static Value *appendVectorArg(IRBuilder<> , Value *Desc, Value *Arg,

vikramRH wrote:

Done

https://github.com/llvm/llvm-project/pull/72556
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] Enable OpenCL hostcall printf (WIP) (PR #72556)

2023-11-19 Thread Vikram Hegde via cfe-commits


@@ -170,20 +173,46 @@ static Value *appendString(IRBuilder<> , Value 
*Desc, Value *Arg,
   return callAppendStringN(Builder, Desc, Arg, Length, IsLast);
 }
 
+static Value *appendVectorArg(IRBuilder<> , Value *Desc, Value *Arg,
+  bool IsLast, bool IsBuffered) {
+  assert(Arg->getType()->isVectorTy() && "incorrent append* function");
+  auto VectorTy = dyn_cast(Arg->getType());
+  auto Zero = Builder.getInt64(0);
+  if (VectorTy) {
+for (unsigned int i = 0; i < VectorTy->getNumElements() - 1; i++) {
+  auto Val = Builder.CreateExtractElement(Arg, i);
+  Desc = callAppendArgs(Builder, Desc, 1,
+fitArgInto64Bits(Builder, Val, IsBuffered), Zero,
+Zero, Zero, Zero, Zero, Zero, false);
+}
+
+auto Val =
+Builder.CreateExtractElement(Arg, VectorTy->getNumElements() - 1);
+return callAppendArgs(Builder, Desc, 1,
+  fitArgInto64Bits(Builder, Val, IsBuffered), Zero,
+  Zero, Zero, Zero, Zero, Zero, IsLast);
+  }
+  return nullptr;
+}
+
 static Value *processArg(IRBuilder<> , Value *Desc, Value *Arg,
- bool SpecIsCString, bool IsLast) {
+ bool SpecIsCString, bool IsVector, bool IsLast,
+ bool IsBuffered) {
   if (SpecIsCString && isa(Arg->getType())) {
 return appendString(Builder, Desc, Arg, IsLast);
-  }
-  // If the format specifies a string but the argument is not, the frontend 
will
-  // have printed a warning. We just rely on undefined behaviour and send the
-  // argument anyway.
-  return appendArg(Builder, Desc, Arg, IsLast);
+  } else if (IsVector) {

vikramRH wrote:

Done

https://github.com/llvm/llvm-project/pull/72556
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] Enable OpenCL hostcall printf (WIP) (PR #72556)

2023-11-19 Thread Vikram Hegde via cfe-commits


@@ -170,20 +173,46 @@ static Value *appendString(IRBuilder<> , Value 
*Desc, Value *Arg,
   return callAppendStringN(Builder, Desc, Arg, Length, IsLast);
 }
 
+static Value *appendVectorArg(IRBuilder<> , Value *Desc, Value *Arg,
+  bool IsLast, bool IsBuffered) {
+  assert(Arg->getType()->isVectorTy() && "incorrent append* function");
+  auto VectorTy = dyn_cast(Arg->getType());
+  auto Zero = Builder.getInt64(0);
+  if (VectorTy) {
+for (unsigned int i = 0; i < VectorTy->getNumElements() - 1; i++) {
+  auto Val = Builder.CreateExtractElement(Arg, i);
+  Desc = callAppendArgs(Builder, Desc, 1,
+fitArgInto64Bits(Builder, Val, IsBuffered), Zero,
+Zero, Zero, Zero, Zero, Zero, false);
+}
+
+auto Val =

vikramRH wrote:

Done

https://github.com/llvm/llvm-project/pull/72556
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] Enable OpenCL hostcall printf (WIP) (PR #72556)

2023-11-19 Thread Vikram Hegde via cfe-commits

https://github.com/vikramRH updated 
https://github.com/llvm/llvm-project/pull/72556

>From 6ace9d0a51064be189093ca3bb42416aafadb7f6 Mon Sep 17 00:00:00 2001
From: Vikram 
Date: Fri, 10 Nov 2023 09:39:41 +
Subject: [PATCH 1/3] [AMDGPU] Treat printf as builtin for OpenCL

---
 clang/include/clang/Basic/BuiltinsAMDGPU.def | 8 
 clang/lib/AST/Decl.cpp   | 7 +++
 clang/lib/Basic/Targets/AMDGPU.cpp   | 2 ++
 clang/lib/CodeGen/CGBuiltin.cpp  | 5 +
 4 files changed, 22 insertions(+)

diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def 
b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index a19c8bd5f219ec6..1799c72806bfdd4 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -21,6 +21,10 @@
 #if defined(BUILTIN) && !defined(TARGET_BUILTIN)
 #   define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) BUILTIN(ID, TYPE, ATTRS)
 #endif
+
+#if defined(BUILTIN) && !defined(LANGBUILTIN)
+#define LANGBUILTIN(ID, TYPE, ATTRS, BUILTIN_LANG) BUILTIN(ID, TYPE, ATTRS)
+#endif
 
//===--===//
 // SI+ only builtins.
 
//===--===//
@@ -406,5 +410,9 @@ TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_fp8_f32, "iffiIb", 
"nc", "fp8-insts")
 TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_bf8_f32, "ifiiIi", "nc", "fp8-insts")
 TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_fp8_f32, "ifiiIi", "nc", "fp8-insts")
 
+// OpenCL
+LANGBUILTIN(printf, "icC*4.", "fp:0:", ALL_OCL_LANGUAGES)
+
 #undef BUILTIN
 #undef TARGET_BUILTIN
+#undef LANGBUILTIN
diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp
index c5c2edf1bfe3aba..2597422bdd521a0 100644
--- a/clang/lib/AST/Decl.cpp
+++ b/clang/lib/AST/Decl.cpp
@@ -49,6 +49,7 @@
 #include "clang/Basic/SourceLocation.h"
 #include "clang/Basic/SourceManager.h"
 #include "clang/Basic/Specifiers.h"
+#include "clang/Basic/TargetBuiltins.h"
 #include "clang/Basic/TargetCXXABI.h"
 #include "clang/Basic/TargetInfo.h"
 #include "clang/Basic/Visibility.h"
@@ -3598,6 +3599,12 @@ unsigned FunctionDecl::getBuiltinID(bool 
ConsiderWrapperFunctions) const {
   if (!ConsiderWrapperFunctions && getStorageClass() == SC_Static)
 return 0;
 
+  // AMDGCN implementation supports printf as a builtin
+  // for OpenCL
+  if (Context.getTargetInfo().getTriple().isAMDGCN() &&
+  Context.getLangOpts().OpenCL && BuiltinID == AMDGPU::BIprintf)
+return BuiltinID;
+
   // OpenCL v1.2 s6.9.f - The library functions defined in
   // the C99 standard headers are not available.
   if (Context.getLangOpts().OpenCL &&
diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp 
b/clang/lib/Basic/Targets/AMDGPU.cpp
index 409ae32ab424215..307cfa49f54e926 100644
--- a/clang/lib/Basic/Targets/AMDGPU.cpp
+++ b/clang/lib/Basic/Targets/AMDGPU.cpp
@@ -91,6 +91,8 @@ static constexpr Builtin::Info BuiltinInfo[] = {
   {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE)   
\
   {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
+#define LANGBUILTIN(ID, TYPE, ATTRS, LANG) 
\
+  {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, LANG},
 #include "clang/Basic/BuiltinsAMDGPU.def"
 };
 
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 09309a3937fb613..987909b5a62e11b 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -2458,6 +2458,11 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl 
GD, unsigned BuiltinID,
   ().getLongDoubleFormat() == ::APFloat::IEEEquad())
 BuiltinID = mutateLongDoubleBuiltin(BuiltinID);
 
+   // Mutate the printf builtin ID so that we use the same CodeGen path for
+   // HIP and OpenCL with AMDGPU targets.
+   if (getTarget().getTriple().isAMDGCN() && BuiltinID == AMDGPU::BIprintf)
+ BuiltinID = Builtin::BIprintf;
+
   // If the builtin has been declared explicitly with an assembler label,
   // disable the specialized emitting below. Ideally we should communicate the
   // rename in IR, or at least avoid generating the intrinsic calls that are

>From 040a28deef5fe7a5d9e357a898b50335992e708d Mon Sep 17 00:00:00 2001
From: Vikram 
Date: Mon, 20 Nov 2023 05:26:27 +
Subject: [PATCH 2/3] [AMDGPU] Enable OpenCL printf expansion at clang CodeGen

---
 clang/lib/CodeGen/CGBuiltin.cpp   |  3 ++-
 clang/lib/CodeGen/CGGPUBuiltin.cpp| 25 +++--
 clang/lib/Driver/ToolChains/Clang.cpp | 10 ++
 3 files changed, 31 insertions(+), 7 deletions(-)

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 987909b5a62e11b..8d51df24c7872b7 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -5622,7 +5622,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl 
GD, unsigned BuiltinID,

[clang] [llvm] Enable OpenCL hostcall printf (WIP) (PR #72556)

2023-11-16 Thread Vikram Hegde via cfe-commits

https://github.com/vikramRH edited 
https://github.com/llvm/llvm-project/pull/72556
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] Enable OpenCL hostcall printf (WIP) (PR #72556)

2023-11-16 Thread Matt Arsenault via cfe-commits


@@ -170,20 +173,46 @@ static Value *appendString(IRBuilder<> , Value 
*Desc, Value *Arg,
   return callAppendStringN(Builder, Desc, Arg, Length, IsLast);
 }
 
+static Value *appendVectorArg(IRBuilder<> , Value *Desc, Value *Arg,
+  bool IsLast, bool IsBuffered) {
+  assert(Arg->getType()->isVectorTy() && "incorrent append* function");
+  auto VectorTy = dyn_cast(Arg->getType());
+  auto Zero = Builder.getInt64(0);
+  if (VectorTy) {
+for (unsigned int i = 0; i < VectorTy->getNumElements() - 1; i++) {
+  auto Val = Builder.CreateExtractElement(Arg, i);
+  Desc = callAppendArgs(Builder, Desc, 1,
+fitArgInto64Bits(Builder, Val, IsBuffered), Zero,
+Zero, Zero, Zero, Zero, Zero, false);
+}
+
+auto Val =
+Builder.CreateExtractElement(Arg, VectorTy->getNumElements() - 1);
+return callAppendArgs(Builder, Desc, 1,
+  fitArgInto64Bits(Builder, Val, IsBuffered), Zero,
+  Zero, Zero, Zero, Zero, Zero, IsLast);
+  }
+  return nullptr;
+}
+
 static Value *processArg(IRBuilder<> , Value *Desc, Value *Arg,
- bool SpecIsCString, bool IsLast) {
+ bool SpecIsCString, bool IsVector, bool IsLast,
+ bool IsBuffered) {
   if (SpecIsCString && isa(Arg->getType())) {
 return appendString(Builder, Desc, Arg, IsLast);
-  }
-  // If the format specifies a string but the argument is not, the frontend 
will
-  // have printed a warning. We just rely on undefined behaviour and send the
-  // argument anyway.
-  return appendArg(Builder, Desc, Arg, IsLast);
+  } else if (IsVector) {

arsenm wrote:

No else after return 

https://github.com/llvm/llvm-project/pull/72556
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] Enable OpenCL hostcall printf (WIP) (PR #72556)

2023-11-16 Thread Matt Arsenault via cfe-commits


@@ -170,20 +173,46 @@ static Value *appendString(IRBuilder<> , Value 
*Desc, Value *Arg,
   return callAppendStringN(Builder, Desc, Arg, Length, IsLast);
 }
 
+static Value *appendVectorArg(IRBuilder<> , Value *Desc, Value *Arg,
+  bool IsLast, bool IsBuffered) {
+  assert(Arg->getType()->isVectorTy() && "incorrent append* function");
+  auto VectorTy = dyn_cast(Arg->getType());
+  auto Zero = Builder.getInt64(0);
+  if (VectorTy) {
+for (unsigned int i = 0; i < VectorTy->getNumElements() - 1; i++) {
+  auto Val = Builder.CreateExtractElement(Arg, i);
+  Desc = callAppendArgs(Builder, Desc, 1,
+fitArgInto64Bits(Builder, Val, IsBuffered), Zero,
+Zero, Zero, Zero, Zero, Zero, false);
+}
+
+auto Val =

arsenm wrote:

Value *

https://github.com/llvm/llvm-project/pull/72556
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] Enable OpenCL hostcall printf (WIP) (PR #72556)

2023-11-16 Thread Matt Arsenault via cfe-commits


@@ -278,7 +310,13 @@ static Value *callBufferedPrintfStart(
 StringData(StringRef(), LenWithNull, LenWithNullAligned, false));
   }
 } else {
-  int AllocSize = M->getDataLayout().getTypeAllocSize(Args[i]->getType());
+  int AllocSize = 0;
+  if (OCLVectors.test(i)) {
+auto VecArg = dyn_cast(Args[i]->getType());
+assert(VecArg && "invalid vector specifier");

arsenm wrote:

cast<> instead of dyn_cast + assert 

https://github.com/llvm/llvm-project/pull/72556
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] Enable OpenCL hostcall printf (WIP) (PR #72556)

2023-11-16 Thread via cfe-commits

github-actions[bot] wrote:




:warning: C/C++ code formatter, clang-format found issues in your code. 
:warning:



You can test this locally with the following command:


``bash
git-clang-format --diff cb4627d15027e1ed0051f5e5af447602f0f60971 
f9329597564d4e3390f6d0d3a08e4a6f66b52de4 -- clang/lib/AST/Decl.cpp 
clang/lib/Basic/Targets/AMDGPU.cpp clang/lib/CodeGen/CGBuiltin.cpp 
clang/lib/CodeGen/CGGPUBuiltin.cpp clang/lib/Driver/ToolChains/Clang.cpp 
llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp
``





View the diff from clang-format here.


``diff
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 8d51df24c7..d21bbf9b5f 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -2458,10 +2458,10 @@ RValue CodeGenFunction::EmitBuiltinExpr(const 
GlobalDecl GD, unsigned BuiltinID,
   ().getLongDoubleFormat() == ::APFloat::IEEEquad())
 BuiltinID = mutateLongDoubleBuiltin(BuiltinID);
 
-   // Mutate the printf builtin ID so that we use the same CodeGen path for
-   // HIP and OpenCL with AMDGPU targets.
-   if (getTarget().getTriple().isAMDGCN() && BuiltinID == AMDGPU::BIprintf)
- BuiltinID = Builtin::BIprintf;
+  // Mutate the printf builtin ID so that we use the same CodeGen path for
+  // HIP and OpenCL with AMDGPU targets.
+  if (getTarget().getTriple().isAMDGCN() && BuiltinID == AMDGPU::BIprintf)
+BuiltinID = Builtin::BIprintf;
 
   // If the builtin has been declared explicitly with an assembler label,
   // disable the specialized emitting below. Ideally we should communicate the
@@ -5623,7 +5623,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl 
GD, unsigned BuiltinID,
   if (getTarget().getTriple().isNVPTX())
 return EmitNVPTXDevicePrintfCallExpr(E);
   if (getTarget().getTriple().isAMDGCN() &&
- (getLangOpts().HIP || getLangOpts().OpenCL))
+  (getLangOpts().HIP || getLangOpts().OpenCL))
 return EmitAMDGPUDevicePrintfCallExpr(E);
 }
 
diff --git a/clang/lib/CodeGen/CGGPUBuiltin.cpp 
b/clang/lib/CodeGen/CGGPUBuiltin.cpp
index 81e23bc325..5a903fb1da 100644
--- a/clang/lib/CodeGen/CGGPUBuiltin.cpp
+++ b/clang/lib/CodeGen/CGGPUBuiltin.cpp
@@ -217,8 +217,7 @@ RValue 
CodeGenFunction::EmitAMDGPUDevicePrintfCallExpr(const CallExpr *E) {
   }
 
   auto PFK = CGM.getTarget().getTargetOpts().AMDGPUPrintfKindVal;
-  bool isBuffered =
-   (PFK == clang::TargetOptions::AMDGPUPrintfKind::Buffered);
+  bool isBuffered = (PFK == clang::TargetOptions::AMDGPUPrintfKind::Buffered);
   auto Printf = llvm::emitAMDGPUPrintfCall(IRB, Args, isBuffered);
   Builder.SetInsertPoint(IRB.GetInsertBlock(), IRB.GetInsertPoint());
   return RValue::get(Printf);

``




https://github.com/llvm/llvm-project/pull/72556
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] Enable OpenCL hostcall printf (WIP) (PR #72556)

2023-11-16 Thread Vikram Hegde via cfe-commits

https://github.com/vikramRH created 
https://github.com/llvm/llvm-project/pull/72556

Kindly review top commit here, The builtin specific changes are up for in a 
seperate patch (https://github.com/llvm/llvm-project/pull/72554)

Few implementation details,
1. Hostcall printf is now default for both HIP and OpenCL.
2. The implementation adds vector processing support both for hostcall and 
buffered cases. The vector elements are extracted and pushed onto the buffer 
individually (each alingned to 8 byte boundary)
3. for OpenCL hostcall case, The format string pointer is addrspace casted to 
generic address space to be compatible with hostcall device lib functions.

>From 6ace9d0a51064be189093ca3bb42416aafadb7f6 Mon Sep 17 00:00:00 2001
From: Vikram 
Date: Fri, 10 Nov 2023 09:39:41 +
Subject: [PATCH 1/2] [AMDGPU] Treat printf as builtin for OpenCL

---
 clang/include/clang/Basic/BuiltinsAMDGPU.def | 8 
 clang/lib/AST/Decl.cpp   | 7 +++
 clang/lib/Basic/Targets/AMDGPU.cpp   | 2 ++
 clang/lib/CodeGen/CGBuiltin.cpp  | 5 +
 4 files changed, 22 insertions(+)

diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def 
b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index a19c8bd5f219ec6..1799c72806bfdd4 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -21,6 +21,10 @@
 #if defined(BUILTIN) && !defined(TARGET_BUILTIN)
 #   define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) BUILTIN(ID, TYPE, ATTRS)
 #endif
+
+#if defined(BUILTIN) && !defined(LANGBUILTIN)
+#define LANGBUILTIN(ID, TYPE, ATTRS, BUILTIN_LANG) BUILTIN(ID, TYPE, ATTRS)
+#endif
 
//===--===//
 // SI+ only builtins.
 
//===--===//
@@ -406,5 +410,9 @@ TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_fp8_f32, "iffiIb", 
"nc", "fp8-insts")
 TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_bf8_f32, "ifiiIi", "nc", "fp8-insts")
 TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_fp8_f32, "ifiiIi", "nc", "fp8-insts")
 
+// OpenCL
+LANGBUILTIN(printf, "icC*4.", "fp:0:", ALL_OCL_LANGUAGES)
+
 #undef BUILTIN
 #undef TARGET_BUILTIN
+#undef LANGBUILTIN
diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp
index c5c2edf1bfe3aba..2597422bdd521a0 100644
--- a/clang/lib/AST/Decl.cpp
+++ b/clang/lib/AST/Decl.cpp
@@ -49,6 +49,7 @@
 #include "clang/Basic/SourceLocation.h"
 #include "clang/Basic/SourceManager.h"
 #include "clang/Basic/Specifiers.h"
+#include "clang/Basic/TargetBuiltins.h"
 #include "clang/Basic/TargetCXXABI.h"
 #include "clang/Basic/TargetInfo.h"
 #include "clang/Basic/Visibility.h"
@@ -3598,6 +3599,12 @@ unsigned FunctionDecl::getBuiltinID(bool 
ConsiderWrapperFunctions) const {
   if (!ConsiderWrapperFunctions && getStorageClass() == SC_Static)
 return 0;
 
+  // AMDGCN implementation supports printf as a builtin
+  // for OpenCL
+  if (Context.getTargetInfo().getTriple().isAMDGCN() &&
+  Context.getLangOpts().OpenCL && BuiltinID == AMDGPU::BIprintf)
+return BuiltinID;
+
   // OpenCL v1.2 s6.9.f - The library functions defined in
   // the C99 standard headers are not available.
   if (Context.getLangOpts().OpenCL &&
diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp 
b/clang/lib/Basic/Targets/AMDGPU.cpp
index 409ae32ab424215..307cfa49f54e926 100644
--- a/clang/lib/Basic/Targets/AMDGPU.cpp
+++ b/clang/lib/Basic/Targets/AMDGPU.cpp
@@ -91,6 +91,8 @@ static constexpr Builtin::Info BuiltinInfo[] = {
   {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE)   
\
   {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
+#define LANGBUILTIN(ID, TYPE, ATTRS, LANG) 
\
+  {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, LANG},
 #include "clang/Basic/BuiltinsAMDGPU.def"
 };
 
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 09309a3937fb613..987909b5a62e11b 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -2458,6 +2458,11 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl 
GD, unsigned BuiltinID,
   ().getLongDoubleFormat() == ::APFloat::IEEEquad())
 BuiltinID = mutateLongDoubleBuiltin(BuiltinID);
 
+   // Mutate the printf builtin ID so that we use the same CodeGen path for
+   // HIP and OpenCL with AMDGPU targets.
+   if (getTarget().getTriple().isAMDGCN() && BuiltinID == AMDGPU::BIprintf)
+ BuiltinID = Builtin::BIprintf;
+
   // If the builtin has been declared explicitly with an assembler label,
   // disable the specialized emitting below. Ideally we should communicate the
   // rename in IR, or at least avoid generating the intrinsic calls that are

>From f9329597564d4e3390f6d0d3a08e4a6f66b52de4 Mon Sep 17 00:00:00 2001
From: Vikram 
Date: Wed, 15 Nov 2023 01:20:55 -0500
Subject: [PATCH 2/2]