[clang] [llvm] Enable OpenCL hostcall printf (WIP) (PR #72556)
llvmbot wrote: @llvm/pr-subscribers-clang-codegen @llvm/pr-subscribers-clang Author: Vikram Hegde (vikramRH) Changes Kindly review top commit here, The builtin specific changes are up for review in a seperate patch (https://github.com/llvm/llvm-project/pull/72554) Few implementation details, 1. Hostcall printf is now default for both HIP and OpenCL. 2. The implementation adds vector processing support both for hostcall and buffered cases. The vector elements are extracted and pushed onto the buffer individually (each alingned to 8 byte boundary) 3. for OpenCL hostcall case, The format string pointer is addrspace casted to generic address space to be compatible with hostcall device lib functions. --- Patch is 89.24 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/72556.diff 9 Files Affected: - (modified) clang/include/clang/Basic/BuiltinsAMDGPU.def (+8) - (modified) clang/lib/AST/Decl.cpp (+7) - (modified) clang/lib/Basic/Targets/AMDGPU.cpp (+2) - (modified) clang/lib/CodeGen/CGBuiltin.cpp (+7-1) - (modified) clang/lib/CodeGen/CGGPUBuiltin.cpp (+37-6) - (modified) clang/lib/Driver/ToolChains/Clang.cpp (+10) - (modified) clang/test/CodeGenOpenCL/amdgpu-printf.cl (+756-1) - (modified) llvm/include/llvm/Transforms/Utils/AMDGPUEmitPrintf.h (+1-1) - (modified) llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp (+90-57) ``diff diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index a19c8bd5f219ec6..1799c72806bfdd4 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -21,6 +21,10 @@ #if defined(BUILTIN) && !defined(TARGET_BUILTIN) # define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) BUILTIN(ID, TYPE, ATTRS) #endif + +#if defined(BUILTIN) && !defined(LANGBUILTIN) +#define LANGBUILTIN(ID, TYPE, ATTRS, BUILTIN_LANG) BUILTIN(ID, TYPE, ATTRS) +#endif //===--===// // SI+ only builtins. //===--===// @@ -406,5 +410,9 @@ TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_fp8_f32, "iffiIb", "nc", "fp8-insts") TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_bf8_f32, "ifiiIi", "nc", "fp8-insts") TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_fp8_f32, "ifiiIi", "nc", "fp8-insts") +// OpenCL +LANGBUILTIN(printf, "icC*4.", "fp:0:", ALL_OCL_LANGUAGES) + #undef BUILTIN #undef TARGET_BUILTIN +#undef LANGBUILTIN diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp index c5c2edf1bfe3aba..2597422bdd521a0 100644 --- a/clang/lib/AST/Decl.cpp +++ b/clang/lib/AST/Decl.cpp @@ -49,6 +49,7 @@ #include "clang/Basic/SourceLocation.h" #include "clang/Basic/SourceManager.h" #include "clang/Basic/Specifiers.h" +#include "clang/Basic/TargetBuiltins.h" #include "clang/Basic/TargetCXXABI.h" #include "clang/Basic/TargetInfo.h" #include "clang/Basic/Visibility.h" @@ -3598,6 +3599,12 @@ unsigned FunctionDecl::getBuiltinID(bool ConsiderWrapperFunctions) const { if (!ConsiderWrapperFunctions && getStorageClass() == SC_Static) return 0; + // AMDGCN implementation supports printf as a builtin + // for OpenCL + if (Context.getTargetInfo().getTriple().isAMDGCN() && + Context.getLangOpts().OpenCL && BuiltinID == AMDGPU::BIprintf) +return BuiltinID; + // OpenCL v1.2 s6.9.f - The library functions defined in // the C99 standard headers are not available. if (Context.getLangOpts().OpenCL && diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp index 409ae32ab424215..307cfa49f54e926 100644 --- a/clang/lib/Basic/Targets/AMDGPU.cpp +++ b/clang/lib/Basic/Targets/AMDGPU.cpp @@ -91,6 +91,8 @@ static constexpr Builtin::Info BuiltinInfo[] = { {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES}, #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \ {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES}, +#define LANGBUILTIN(ID, TYPE, ATTRS, LANG) \ + {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, LANG}, #include "clang/Basic/BuiltinsAMDGPU.def" }; diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 09309a3937fb613..8d51df24c7872b7 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -2458,6 +2458,11 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, ().getLongDoubleFormat() == ::APFloat::IEEEquad()) BuiltinID = mutateLongDoubleBuiltin(BuiltinID); + // Mutate the printf builtin ID so that we use the same CodeGen path for + // HIP and OpenCL with AMDGPU targets. + if (getTarget().getTriple().isAMDGCN() && BuiltinID == AMDGPU::BIprintf) + BuiltinID = Builtin::BIprintf; + // If the builtin has been declared explicitly with an assembler label, // disable the
[clang] [llvm] Enable OpenCL hostcall printf (WIP) (PR #72556)
https://github.com/vikramRH ready_for_review https://github.com/llvm/llvm-project/pull/72556 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] Enable OpenCL hostcall printf (WIP) (PR #72556)
@@ -26,28 +26,31 @@ using namespace llvm; #define DEBUG_TYPE "amdgpu-emit-printf" -static Value *fitArgInto64Bits(IRBuilder<> , Value *Arg) { +static Value *fitArgInto64Bits(IRBuilder<> , Value *Arg, + bool IsBuffered) { + const DataLayout = Builder.GetInsertBlock()->getModule()->getDataLayout(); auto Int64Ty = Builder.getInt64Ty(); auto Ty = Arg->getType(); if (auto IntTy = dyn_cast(Ty)) { -switch (IntTy->getBitWidth()) { -case 32: - return Builder.CreateZExt(Arg, Int64Ty); -case 64: - return Arg; +if (IntTy->getBitWidth() < 64) { + return Builder.CreateZExt(Arg, Builder.getInt64Ty()); } } - if (Ty->getTypeID() == Type::DoubleTyID) { + if (Ty->isFloatingPointTy()) { +if (DL.getTypeAllocSize(Ty) < 8) + Arg = Builder.CreateFPExt(Arg, Builder.getDoubleTy()); vikramRH wrote: The type cast is necessary for types such as _Float16, which is not handled at argument promotion. I have added a test case to show the same https://github.com/llvm/llvm-project/pull/72556 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] Enable OpenCL hostcall printf (WIP) (PR #72556)
@@ -26,28 +26,31 @@ using namespace llvm; #define DEBUG_TYPE "amdgpu-emit-printf" -static Value *fitArgInto64Bits(IRBuilder<> , Value *Arg) { +static Value *fitArgInto64Bits(IRBuilder<> , Value *Arg, + bool IsBuffered) { + const DataLayout = Builder.GetInsertBlock()->getModule()->getDataLayout(); auto Int64Ty = Builder.getInt64Ty(); auto Ty = Arg->getType(); if (auto IntTy = dyn_cast(Ty)) { -switch (IntTy->getBitWidth()) { -case 32: - return Builder.CreateZExt(Arg, Int64Ty); -case 64: - return Arg; +if (IntTy->getBitWidth() < 64) { + return Builder.CreateZExt(Arg, Builder.getInt64Ty()); } } - if (Ty->getTypeID() == Type::DoubleTyID) { + if (Ty->isFloatingPointTy()) { +if (DL.getTypeAllocSize(Ty) < 8) + Arg = Builder.CreateFPExt(Arg, Builder.getDoubleTy()); +if (IsBuffered) + return Arg; return Builder.CreateBitCast(Arg, Int64Ty); } - if (isa(Ty)) { + if (!IsBuffered && isa(Ty)) { return Builder.CreatePtrToInt(Arg, Int64Ty); vikramRH wrote: The pointer is just pushed onto the buffer. The cast is necessary for the hostcall case to be compatible with device lib functions https://github.com/llvm/llvm-project/pull/72556 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] Enable OpenCL hostcall printf (WIP) (PR #72556)
@@ -170,20 +173,49 @@ static Value *appendString(IRBuilder<> , Value *Desc, Value *Arg, return callAppendStringN(Builder, Desc, Arg, Length, IsLast); } +static Value *appendVectorArg(IRBuilder<> , Value *Desc, Value *Arg, + bool IsLast, bool IsBuffered) { + assert(Arg->getType()->isVectorTy() && "incorrent append* function"); + auto VectorTy = dyn_cast(Arg->getType()); + auto Zero = Builder.getInt64(0); + if (VectorTy) { vikramRH wrote: I have changed this code a little now so that only FixedVectorTypes are handled. This should be okay since the OCL specs specifically say only vectors of length 2,3,4,8 and 16 are supported for printf. https://github.com/llvm/llvm-project/pull/72556 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] Enable OpenCL hostcall printf (WIP) (PR #72556)
@@ -194,6 +226,8 @@ static void locateCStrings(SparseBitVector<8> , StringRef Str) { SpecPos += 2; continue; } +if (Str.find_first_of("v", SpecPos) != StringRef::npos) vikramRH wrote: Fixed https://github.com/llvm/llvm-project/pull/72556 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] Enable OpenCL hostcall printf (WIP) (PR #72556)
@@ -170,20 +173,49 @@ static Value *appendString(IRBuilder<> , Value *Desc, Value *Arg, return callAppendStringN(Builder, Desc, Arg, Length, IsLast); } +static Value *appendVectorArg(IRBuilder<> , Value *Desc, Value *Arg, + bool IsLast, bool IsBuffered) { + assert(Arg->getType()->isVectorTy() && "incorrent append* function"); + auto VectorTy = dyn_cast(Arg->getType()); + auto Zero = Builder.getInt64(0); + if (VectorTy) { +for (unsigned int i = 0; i < VectorTy->getNumElements() - 1; i++) { + auto Val = Builder.CreateExtractElement(Arg, i); + Desc = callAppendArgs(Builder, Desc, 1, +fitArgInto64Bits(Builder, Val, IsBuffered), Zero, +Zero, Zero, Zero, Zero, Zero, false); +} + +Value* Val = +Builder.CreateExtractElement(Arg, VectorTy->getNumElements() - 1); +return callAppendArgs(Builder, Desc, 1, + fitArgInto64Bits(Builder, Val, IsBuffered), Zero, + Zero, Zero, Zero, Zero, Zero, IsLast); + } + return nullptr; +} + static Value *processArg(IRBuilder<> , Value *Desc, Value *Arg, - bool SpecIsCString, bool IsLast) { + bool SpecIsCString, bool IsVector, bool IsLast, + bool IsBuffered) { if (SpecIsCString && isa(Arg->getType())) { return appendString(Builder, Desc, Arg, IsLast); } - // If the format specifies a string but the argument is not, the frontend will - // have printed a warning. We just rely on undefined behaviour and send the - // argument anyway. - return appendArg(Builder, Desc, Arg, IsLast); + + if (IsVector) { +return appendVectorArg(Builder, Desc, Arg, IsLast, IsBuffered); + } + + // If the format specifies a string but the argument is not, the frontend + // will have printed a warning. We just rely on undefined behaviour and send + // the argument anyway. + return appendArg(Builder, Desc, Arg, IsLast, IsBuffered); } // Scan the format string to locate all specifiers, and mark the ones that // specify a string, i.e, the "%s" specifier with optional '*' characters. -static void locateCStrings(SparseBitVector<8> , StringRef Str) { +static void locateCStringsAndVectors(SparseBitVector<8> , vikramRH wrote: Done https://github.com/llvm/llvm-project/pull/72556 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] Enable OpenCL hostcall printf (WIP) (PR #72556)
@@ -1,12 +1,68 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// RUN: %clang_cc1 -cl-std=CL1.2 -triple amdgcn-amd-amdhsa -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -cl-std=CL1.2 -triple amdgcn-amd-amdhsa -mprintf-kind=buffered -disable-llvm-passes -emit-llvm -o - %s | FileCheck --check-prefix=CHECK_BUFFERED %s +// RUN: %clang_cc1 -cl-std=CL1.2 -triple amdgcn-amd-amdhsa -mprintf-kind=hostcall -disable-llvm-passes -emit-llvm -o - %s | FileCheck --check-prefix=CHECK_HOSTCALL %s int printf(__constant const char* st, ...) __attribute__((format(printf, 1, 2))); vikramRH wrote: Done https://github.com/llvm/llvm-project/pull/72556 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] Enable OpenCL hostcall printf (WIP) (PR #72556)
https://github.com/vikramRH updated https://github.com/llvm/llvm-project/pull/72556 >From 6ace9d0a51064be189093ca3bb42416aafadb7f6 Mon Sep 17 00:00:00 2001 From: Vikram Date: Fri, 10 Nov 2023 09:39:41 + Subject: [PATCH 1/4] [AMDGPU] Treat printf as builtin for OpenCL --- clang/include/clang/Basic/BuiltinsAMDGPU.def | 8 clang/lib/AST/Decl.cpp | 7 +++ clang/lib/Basic/Targets/AMDGPU.cpp | 2 ++ clang/lib/CodeGen/CGBuiltin.cpp | 5 + 4 files changed, 22 insertions(+) diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index a19c8bd5f219ec6..1799c72806bfdd4 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -21,6 +21,10 @@ #if defined(BUILTIN) && !defined(TARGET_BUILTIN) # define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) BUILTIN(ID, TYPE, ATTRS) #endif + +#if defined(BUILTIN) && !defined(LANGBUILTIN) +#define LANGBUILTIN(ID, TYPE, ATTRS, BUILTIN_LANG) BUILTIN(ID, TYPE, ATTRS) +#endif //===--===// // SI+ only builtins. //===--===// @@ -406,5 +410,9 @@ TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_fp8_f32, "iffiIb", "nc", "fp8-insts") TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_bf8_f32, "ifiiIi", "nc", "fp8-insts") TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_fp8_f32, "ifiiIi", "nc", "fp8-insts") +// OpenCL +LANGBUILTIN(printf, "icC*4.", "fp:0:", ALL_OCL_LANGUAGES) + #undef BUILTIN #undef TARGET_BUILTIN +#undef LANGBUILTIN diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp index c5c2edf1bfe3aba..2597422bdd521a0 100644 --- a/clang/lib/AST/Decl.cpp +++ b/clang/lib/AST/Decl.cpp @@ -49,6 +49,7 @@ #include "clang/Basic/SourceLocation.h" #include "clang/Basic/SourceManager.h" #include "clang/Basic/Specifiers.h" +#include "clang/Basic/TargetBuiltins.h" #include "clang/Basic/TargetCXXABI.h" #include "clang/Basic/TargetInfo.h" #include "clang/Basic/Visibility.h" @@ -3598,6 +3599,12 @@ unsigned FunctionDecl::getBuiltinID(bool ConsiderWrapperFunctions) const { if (!ConsiderWrapperFunctions && getStorageClass() == SC_Static) return 0; + // AMDGCN implementation supports printf as a builtin + // for OpenCL + if (Context.getTargetInfo().getTriple().isAMDGCN() && + Context.getLangOpts().OpenCL && BuiltinID == AMDGPU::BIprintf) +return BuiltinID; + // OpenCL v1.2 s6.9.f - The library functions defined in // the C99 standard headers are not available. if (Context.getLangOpts().OpenCL && diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp index 409ae32ab424215..307cfa49f54e926 100644 --- a/clang/lib/Basic/Targets/AMDGPU.cpp +++ b/clang/lib/Basic/Targets/AMDGPU.cpp @@ -91,6 +91,8 @@ static constexpr Builtin::Info BuiltinInfo[] = { {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES}, #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \ {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES}, +#define LANGBUILTIN(ID, TYPE, ATTRS, LANG) \ + {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, LANG}, #include "clang/Basic/BuiltinsAMDGPU.def" }; diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 09309a3937fb613..987909b5a62e11b 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -2458,6 +2458,11 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, ().getLongDoubleFormat() == ::APFloat::IEEEquad()) BuiltinID = mutateLongDoubleBuiltin(BuiltinID); + // Mutate the printf builtin ID so that we use the same CodeGen path for + // HIP and OpenCL with AMDGPU targets. + if (getTarget().getTriple().isAMDGCN() && BuiltinID == AMDGPU::BIprintf) + BuiltinID = Builtin::BIprintf; + // If the builtin has been declared explicitly with an assembler label, // disable the specialized emitting below. Ideally we should communicate the // rename in IR, or at least avoid generating the intrinsic calls that are >From 040a28deef5fe7a5d9e357a898b50335992e708d Mon Sep 17 00:00:00 2001 From: Vikram Date: Mon, 20 Nov 2023 05:26:27 + Subject: [PATCH 2/4] [AMDGPU] Enable OpenCL printf expansion at clang CodeGen --- clang/lib/CodeGen/CGBuiltin.cpp | 3 ++- clang/lib/CodeGen/CGGPUBuiltin.cpp| 25 +++-- clang/lib/Driver/ToolChains/Clang.cpp | 10 ++ 3 files changed, 31 insertions(+), 7 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 987909b5a62e11b..8d51df24c7872b7 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -5622,7 +5622,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
[clang] [llvm] Enable OpenCL hostcall printf (WIP) (PR #72556)
@@ -194,6 +226,8 @@ static void locateCStrings(SparseBitVector<8> , StringRef Str) { SpecPos += 2; continue; } +if (Str.find_first_of("v", SpecPos) != StringRef::npos) ssahasra wrote: I don't think this will work as expected. It can clearly match a "v" that occurs after the data type. For example, it is supposed to match "%v2d", but it will also match "%d v". The match should be performed inside the "Spec" substring created below. https://github.com/llvm/llvm-project/pull/72556 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] Enable OpenCL hostcall printf (WIP) (PR #72556)
@@ -170,20 +173,49 @@ static Value *appendString(IRBuilder<> , Value *Desc, Value *Arg, return callAppendStringN(Builder, Desc, Arg, Length, IsLast); } +static Value *appendVectorArg(IRBuilder<> , Value *Desc, Value *Arg, + bool IsLast, bool IsBuffered) { + assert(Arg->getType()->isVectorTy() && "incorrent append* function"); + auto VectorTy = dyn_cast(Arg->getType()); + auto Zero = Builder.getInt64(0); + if (VectorTy) { +for (unsigned int i = 0; i < VectorTy->getNumElements() - 1; i++) { + auto Val = Builder.CreateExtractElement(Arg, i); + Desc = callAppendArgs(Builder, Desc, 1, +fitArgInto64Bits(Builder, Val, IsBuffered), Zero, +Zero, Zero, Zero, Zero, Zero, false); +} + +Value* Val = +Builder.CreateExtractElement(Arg, VectorTy->getNumElements() - 1); +return callAppendArgs(Builder, Desc, 1, + fitArgInto64Bits(Builder, Val, IsBuffered), Zero, + Zero, Zero, Zero, Zero, Zero, IsLast); + } + return nullptr; +} + static Value *processArg(IRBuilder<> , Value *Desc, Value *Arg, - bool SpecIsCString, bool IsLast) { + bool SpecIsCString, bool IsVector, bool IsLast, + bool IsBuffered) { if (SpecIsCString && isa(Arg->getType())) { return appendString(Builder, Desc, Arg, IsLast); } - // If the format specifies a string but the argument is not, the frontend will - // have printed a warning. We just rely on undefined behaviour and send the - // argument anyway. - return appendArg(Builder, Desc, Arg, IsLast); + + if (IsVector) { +return appendVectorArg(Builder, Desc, Arg, IsLast, IsBuffered); + } + + // If the format specifies a string but the argument is not, the frontend + // will have printed a warning. We just rely on undefined behaviour and send + // the argument anyway. ssahasra wrote: This is a pure whitespace change. Keeping the original formatting of the comment helps simplify the diff. https://github.com/llvm/llvm-project/pull/72556 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] Enable OpenCL hostcall printf (WIP) (PR #72556)
@@ -170,20 +173,49 @@ static Value *appendString(IRBuilder<> , Value *Desc, Value *Arg, return callAppendStringN(Builder, Desc, Arg, Length, IsLast); } +static Value *appendVectorArg(IRBuilder<> , Value *Desc, Value *Arg, + bool IsLast, bool IsBuffered) { + assert(Arg->getType()->isVectorTy() && "incorrent append* function"); + auto VectorTy = dyn_cast(Arg->getType()); + auto Zero = Builder.getInt64(0); + if (VectorTy) { ssahasra wrote: So the argument is ignored if it is not a FixedVectorType? https://github.com/llvm/llvm-project/pull/72556 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] Enable OpenCL hostcall printf (WIP) (PR #72556)
@@ -26,28 +26,31 @@ using namespace llvm; #define DEBUG_TYPE "amdgpu-emit-printf" -static Value *fitArgInto64Bits(IRBuilder<> , Value *Arg) { +static Value *fitArgInto64Bits(IRBuilder<> , Value *Arg, + bool IsBuffered) { + const DataLayout = Builder.GetInsertBlock()->getModule()->getDataLayout(); auto Int64Ty = Builder.getInt64Ty(); auto Ty = Arg->getType(); if (auto IntTy = dyn_cast(Ty)) { -switch (IntTy->getBitWidth()) { -case 32: - return Builder.CreateZExt(Arg, Int64Ty); -case 64: - return Arg; +if (IntTy->getBitWidth() < 64) { + return Builder.CreateZExt(Arg, Builder.getInt64Ty()); } } - if (Ty->getTypeID() == Type::DoubleTyID) { + if (Ty->isFloatingPointTy()) { +if (DL.getTypeAllocSize(Ty) < 8) + Arg = Builder.CreateFPExt(Arg, Builder.getDoubleTy()); ssahasra wrote: This typecast should not be necessary. Default argument promotions in C++ for variadic functions ensure that all floating point arguments are promoted to double. If that is not happening, can you demonstrate with a test? https://github.com/llvm/llvm-project/pull/72556 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] Enable OpenCL hostcall printf (WIP) (PR #72556)
@@ -4742,6 +4742,16 @@ void Clang::ConstructJob(Compilation , const JobAction , Args.ClaimAllArgs(options::OPT_gen_cdb_fragment_path); } + if (TC.getTriple().isAMDGPU() && types::isOpenCL(Input.getType())) { +if (Args.getLastArg(options::OPT_mprintf_kind_EQ)) { + CmdArgs.push_back(Args.MakeArgString( + "-mprintf-kind=" + + Args.getLastArgValue(options::OPT_mprintf_kind_EQ))); + // Force compiler error on invalid conversion specifiers + CmdArgs.push_back(Args.MakeArgString("-Werror=format-invalid-specifier")); ssahasra wrote: Why is this necessary here? https://github.com/llvm/llvm-project/pull/72556 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] Enable OpenCL hostcall printf (WIP) (PR #72556)
@@ -278,7 +310,13 @@ static Value *callBufferedPrintfStart( StringData(StringRef(), LenWithNull, LenWithNullAligned, false)); } } else { - int AllocSize = M->getDataLayout().getTypeAllocSize(Args[i]->getType()); + int AllocSize = 0; + if (OCLVectors.test(i)) { +auto VecArg = dyn_cast(Args[i]->getType()); +assert(VecArg && "invalid vector specifier"); vikramRH wrote: Done https://github.com/llvm/llvm-project/pull/72556 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] Enable OpenCL hostcall printf (WIP) (PR #72556)
@@ -170,20 +173,46 @@ static Value *appendString(IRBuilder<> , Value *Desc, Value *Arg, return callAppendStringN(Builder, Desc, Arg, Length, IsLast); } +static Value *appendVectorArg(IRBuilder<> , Value *Desc, Value *Arg, vikramRH wrote: Done https://github.com/llvm/llvm-project/pull/72556 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] Enable OpenCL hostcall printf (WIP) (PR #72556)
@@ -170,20 +173,46 @@ static Value *appendString(IRBuilder<> , Value *Desc, Value *Arg, return callAppendStringN(Builder, Desc, Arg, Length, IsLast); } +static Value *appendVectorArg(IRBuilder<> , Value *Desc, Value *Arg, + bool IsLast, bool IsBuffered) { + assert(Arg->getType()->isVectorTy() && "incorrent append* function"); + auto VectorTy = dyn_cast(Arg->getType()); + auto Zero = Builder.getInt64(0); + if (VectorTy) { +for (unsigned int i = 0; i < VectorTy->getNumElements() - 1; i++) { + auto Val = Builder.CreateExtractElement(Arg, i); + Desc = callAppendArgs(Builder, Desc, 1, +fitArgInto64Bits(Builder, Val, IsBuffered), Zero, +Zero, Zero, Zero, Zero, Zero, false); +} + +auto Val = +Builder.CreateExtractElement(Arg, VectorTy->getNumElements() - 1); +return callAppendArgs(Builder, Desc, 1, + fitArgInto64Bits(Builder, Val, IsBuffered), Zero, + Zero, Zero, Zero, Zero, Zero, IsLast); + } + return nullptr; +} + static Value *processArg(IRBuilder<> , Value *Desc, Value *Arg, - bool SpecIsCString, bool IsLast) { + bool SpecIsCString, bool IsVector, bool IsLast, + bool IsBuffered) { if (SpecIsCString && isa(Arg->getType())) { return appendString(Builder, Desc, Arg, IsLast); - } - // If the format specifies a string but the argument is not, the frontend will - // have printed a warning. We just rely on undefined behaviour and send the - // argument anyway. - return appendArg(Builder, Desc, Arg, IsLast); + } else if (IsVector) { vikramRH wrote: Done https://github.com/llvm/llvm-project/pull/72556 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] Enable OpenCL hostcall printf (WIP) (PR #72556)
@@ -170,20 +173,46 @@ static Value *appendString(IRBuilder<> , Value *Desc, Value *Arg, return callAppendStringN(Builder, Desc, Arg, Length, IsLast); } +static Value *appendVectorArg(IRBuilder<> , Value *Desc, Value *Arg, + bool IsLast, bool IsBuffered) { + assert(Arg->getType()->isVectorTy() && "incorrent append* function"); + auto VectorTy = dyn_cast(Arg->getType()); + auto Zero = Builder.getInt64(0); + if (VectorTy) { +for (unsigned int i = 0; i < VectorTy->getNumElements() - 1; i++) { + auto Val = Builder.CreateExtractElement(Arg, i); + Desc = callAppendArgs(Builder, Desc, 1, +fitArgInto64Bits(Builder, Val, IsBuffered), Zero, +Zero, Zero, Zero, Zero, Zero, false); +} + +auto Val = vikramRH wrote: Done https://github.com/llvm/llvm-project/pull/72556 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] Enable OpenCL hostcall printf (WIP) (PR #72556)
https://github.com/vikramRH updated https://github.com/llvm/llvm-project/pull/72556 >From 6ace9d0a51064be189093ca3bb42416aafadb7f6 Mon Sep 17 00:00:00 2001 From: Vikram Date: Fri, 10 Nov 2023 09:39:41 + Subject: [PATCH 1/3] [AMDGPU] Treat printf as builtin for OpenCL --- clang/include/clang/Basic/BuiltinsAMDGPU.def | 8 clang/lib/AST/Decl.cpp | 7 +++ clang/lib/Basic/Targets/AMDGPU.cpp | 2 ++ clang/lib/CodeGen/CGBuiltin.cpp | 5 + 4 files changed, 22 insertions(+) diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index a19c8bd5f219ec6..1799c72806bfdd4 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -21,6 +21,10 @@ #if defined(BUILTIN) && !defined(TARGET_BUILTIN) # define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) BUILTIN(ID, TYPE, ATTRS) #endif + +#if defined(BUILTIN) && !defined(LANGBUILTIN) +#define LANGBUILTIN(ID, TYPE, ATTRS, BUILTIN_LANG) BUILTIN(ID, TYPE, ATTRS) +#endif //===--===// // SI+ only builtins. //===--===// @@ -406,5 +410,9 @@ TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_fp8_f32, "iffiIb", "nc", "fp8-insts") TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_bf8_f32, "ifiiIi", "nc", "fp8-insts") TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_fp8_f32, "ifiiIi", "nc", "fp8-insts") +// OpenCL +LANGBUILTIN(printf, "icC*4.", "fp:0:", ALL_OCL_LANGUAGES) + #undef BUILTIN #undef TARGET_BUILTIN +#undef LANGBUILTIN diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp index c5c2edf1bfe3aba..2597422bdd521a0 100644 --- a/clang/lib/AST/Decl.cpp +++ b/clang/lib/AST/Decl.cpp @@ -49,6 +49,7 @@ #include "clang/Basic/SourceLocation.h" #include "clang/Basic/SourceManager.h" #include "clang/Basic/Specifiers.h" +#include "clang/Basic/TargetBuiltins.h" #include "clang/Basic/TargetCXXABI.h" #include "clang/Basic/TargetInfo.h" #include "clang/Basic/Visibility.h" @@ -3598,6 +3599,12 @@ unsigned FunctionDecl::getBuiltinID(bool ConsiderWrapperFunctions) const { if (!ConsiderWrapperFunctions && getStorageClass() == SC_Static) return 0; + // AMDGCN implementation supports printf as a builtin + // for OpenCL + if (Context.getTargetInfo().getTriple().isAMDGCN() && + Context.getLangOpts().OpenCL && BuiltinID == AMDGPU::BIprintf) +return BuiltinID; + // OpenCL v1.2 s6.9.f - The library functions defined in // the C99 standard headers are not available. if (Context.getLangOpts().OpenCL && diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp index 409ae32ab424215..307cfa49f54e926 100644 --- a/clang/lib/Basic/Targets/AMDGPU.cpp +++ b/clang/lib/Basic/Targets/AMDGPU.cpp @@ -91,6 +91,8 @@ static constexpr Builtin::Info BuiltinInfo[] = { {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES}, #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \ {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES}, +#define LANGBUILTIN(ID, TYPE, ATTRS, LANG) \ + {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, LANG}, #include "clang/Basic/BuiltinsAMDGPU.def" }; diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 09309a3937fb613..987909b5a62e11b 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -2458,6 +2458,11 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, ().getLongDoubleFormat() == ::APFloat::IEEEquad()) BuiltinID = mutateLongDoubleBuiltin(BuiltinID); + // Mutate the printf builtin ID so that we use the same CodeGen path for + // HIP and OpenCL with AMDGPU targets. + if (getTarget().getTriple().isAMDGCN() && BuiltinID == AMDGPU::BIprintf) + BuiltinID = Builtin::BIprintf; + // If the builtin has been declared explicitly with an assembler label, // disable the specialized emitting below. Ideally we should communicate the // rename in IR, or at least avoid generating the intrinsic calls that are >From 040a28deef5fe7a5d9e357a898b50335992e708d Mon Sep 17 00:00:00 2001 From: Vikram Date: Mon, 20 Nov 2023 05:26:27 + Subject: [PATCH 2/3] [AMDGPU] Enable OpenCL printf expansion at clang CodeGen --- clang/lib/CodeGen/CGBuiltin.cpp | 3 ++- clang/lib/CodeGen/CGGPUBuiltin.cpp| 25 +++-- clang/lib/Driver/ToolChains/Clang.cpp | 10 ++ 3 files changed, 31 insertions(+), 7 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 987909b5a62e11b..8d51df24c7872b7 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -5622,7 +5622,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
[clang] [llvm] Enable OpenCL hostcall printf (WIP) (PR #72556)
https://github.com/vikramRH edited https://github.com/llvm/llvm-project/pull/72556 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] Enable OpenCL hostcall printf (WIP) (PR #72556)
@@ -170,20 +173,46 @@ static Value *appendString(IRBuilder<> , Value *Desc, Value *Arg, return callAppendStringN(Builder, Desc, Arg, Length, IsLast); } +static Value *appendVectorArg(IRBuilder<> , Value *Desc, Value *Arg, + bool IsLast, bool IsBuffered) { + assert(Arg->getType()->isVectorTy() && "incorrent append* function"); + auto VectorTy = dyn_cast(Arg->getType()); + auto Zero = Builder.getInt64(0); + if (VectorTy) { +for (unsigned int i = 0; i < VectorTy->getNumElements() - 1; i++) { + auto Val = Builder.CreateExtractElement(Arg, i); + Desc = callAppendArgs(Builder, Desc, 1, +fitArgInto64Bits(Builder, Val, IsBuffered), Zero, +Zero, Zero, Zero, Zero, Zero, false); +} + +auto Val = +Builder.CreateExtractElement(Arg, VectorTy->getNumElements() - 1); +return callAppendArgs(Builder, Desc, 1, + fitArgInto64Bits(Builder, Val, IsBuffered), Zero, + Zero, Zero, Zero, Zero, Zero, IsLast); + } + return nullptr; +} + static Value *processArg(IRBuilder<> , Value *Desc, Value *Arg, - bool SpecIsCString, bool IsLast) { + bool SpecIsCString, bool IsVector, bool IsLast, + bool IsBuffered) { if (SpecIsCString && isa(Arg->getType())) { return appendString(Builder, Desc, Arg, IsLast); - } - // If the format specifies a string but the argument is not, the frontend will - // have printed a warning. We just rely on undefined behaviour and send the - // argument anyway. - return appendArg(Builder, Desc, Arg, IsLast); + } else if (IsVector) { arsenm wrote: No else after return https://github.com/llvm/llvm-project/pull/72556 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] Enable OpenCL hostcall printf (WIP) (PR #72556)
@@ -170,20 +173,46 @@ static Value *appendString(IRBuilder<> , Value *Desc, Value *Arg, return callAppendStringN(Builder, Desc, Arg, Length, IsLast); } +static Value *appendVectorArg(IRBuilder<> , Value *Desc, Value *Arg, + bool IsLast, bool IsBuffered) { + assert(Arg->getType()->isVectorTy() && "incorrent append* function"); + auto VectorTy = dyn_cast(Arg->getType()); + auto Zero = Builder.getInt64(0); + if (VectorTy) { +for (unsigned int i = 0; i < VectorTy->getNumElements() - 1; i++) { + auto Val = Builder.CreateExtractElement(Arg, i); + Desc = callAppendArgs(Builder, Desc, 1, +fitArgInto64Bits(Builder, Val, IsBuffered), Zero, +Zero, Zero, Zero, Zero, Zero, false); +} + +auto Val = arsenm wrote: Value * https://github.com/llvm/llvm-project/pull/72556 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] Enable OpenCL hostcall printf (WIP) (PR #72556)
@@ -278,7 +310,13 @@ static Value *callBufferedPrintfStart( StringData(StringRef(), LenWithNull, LenWithNullAligned, false)); } } else { - int AllocSize = M->getDataLayout().getTypeAllocSize(Args[i]->getType()); + int AllocSize = 0; + if (OCLVectors.test(i)) { +auto VecArg = dyn_cast(Args[i]->getType()); +assert(VecArg && "invalid vector specifier"); arsenm wrote: cast<> instead of dyn_cast + assert https://github.com/llvm/llvm-project/pull/72556 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] Enable OpenCL hostcall printf (WIP) (PR #72556)
github-actions[bot] wrote: :warning: C/C++ code formatter, clang-format found issues in your code. :warning: You can test this locally with the following command: ``bash git-clang-format --diff cb4627d15027e1ed0051f5e5af447602f0f60971 f9329597564d4e3390f6d0d3a08e4a6f66b52de4 -- clang/lib/AST/Decl.cpp clang/lib/Basic/Targets/AMDGPU.cpp clang/lib/CodeGen/CGBuiltin.cpp clang/lib/CodeGen/CGGPUBuiltin.cpp clang/lib/Driver/ToolChains/Clang.cpp llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp `` View the diff from clang-format here. ``diff diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 8d51df24c7..d21bbf9b5f 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -2458,10 +2458,10 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, ().getLongDoubleFormat() == ::APFloat::IEEEquad()) BuiltinID = mutateLongDoubleBuiltin(BuiltinID); - // Mutate the printf builtin ID so that we use the same CodeGen path for - // HIP and OpenCL with AMDGPU targets. - if (getTarget().getTriple().isAMDGCN() && BuiltinID == AMDGPU::BIprintf) - BuiltinID = Builtin::BIprintf; + // Mutate the printf builtin ID so that we use the same CodeGen path for + // HIP and OpenCL with AMDGPU targets. + if (getTarget().getTriple().isAMDGCN() && BuiltinID == AMDGPU::BIprintf) +BuiltinID = Builtin::BIprintf; // If the builtin has been declared explicitly with an assembler label, // disable the specialized emitting below. Ideally we should communicate the @@ -5623,7 +5623,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, if (getTarget().getTriple().isNVPTX()) return EmitNVPTXDevicePrintfCallExpr(E); if (getTarget().getTriple().isAMDGCN() && - (getLangOpts().HIP || getLangOpts().OpenCL)) + (getLangOpts().HIP || getLangOpts().OpenCL)) return EmitAMDGPUDevicePrintfCallExpr(E); } diff --git a/clang/lib/CodeGen/CGGPUBuiltin.cpp b/clang/lib/CodeGen/CGGPUBuiltin.cpp index 81e23bc325..5a903fb1da 100644 --- a/clang/lib/CodeGen/CGGPUBuiltin.cpp +++ b/clang/lib/CodeGen/CGGPUBuiltin.cpp @@ -217,8 +217,7 @@ RValue CodeGenFunction::EmitAMDGPUDevicePrintfCallExpr(const CallExpr *E) { } auto PFK = CGM.getTarget().getTargetOpts().AMDGPUPrintfKindVal; - bool isBuffered = - (PFK == clang::TargetOptions::AMDGPUPrintfKind::Buffered); + bool isBuffered = (PFK == clang::TargetOptions::AMDGPUPrintfKind::Buffered); auto Printf = llvm::emitAMDGPUPrintfCall(IRB, Args, isBuffered); Builder.SetInsertPoint(IRB.GetInsertBlock(), IRB.GetInsertPoint()); return RValue::get(Printf); `` https://github.com/llvm/llvm-project/pull/72556 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] Enable OpenCL hostcall printf (WIP) (PR #72556)
https://github.com/vikramRH created https://github.com/llvm/llvm-project/pull/72556 Kindly review top commit here, The builtin specific changes are up for in a seperate patch (https://github.com/llvm/llvm-project/pull/72554) Few implementation details, 1. Hostcall printf is now default for both HIP and OpenCL. 2. The implementation adds vector processing support both for hostcall and buffered cases. The vector elements are extracted and pushed onto the buffer individually (each alingned to 8 byte boundary) 3. for OpenCL hostcall case, The format string pointer is addrspace casted to generic address space to be compatible with hostcall device lib functions. >From 6ace9d0a51064be189093ca3bb42416aafadb7f6 Mon Sep 17 00:00:00 2001 From: Vikram Date: Fri, 10 Nov 2023 09:39:41 + Subject: [PATCH 1/2] [AMDGPU] Treat printf as builtin for OpenCL --- clang/include/clang/Basic/BuiltinsAMDGPU.def | 8 clang/lib/AST/Decl.cpp | 7 +++ clang/lib/Basic/Targets/AMDGPU.cpp | 2 ++ clang/lib/CodeGen/CGBuiltin.cpp | 5 + 4 files changed, 22 insertions(+) diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index a19c8bd5f219ec6..1799c72806bfdd4 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -21,6 +21,10 @@ #if defined(BUILTIN) && !defined(TARGET_BUILTIN) # define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) BUILTIN(ID, TYPE, ATTRS) #endif + +#if defined(BUILTIN) && !defined(LANGBUILTIN) +#define LANGBUILTIN(ID, TYPE, ATTRS, BUILTIN_LANG) BUILTIN(ID, TYPE, ATTRS) +#endif //===--===// // SI+ only builtins. //===--===// @@ -406,5 +410,9 @@ TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_fp8_f32, "iffiIb", "nc", "fp8-insts") TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_bf8_f32, "ifiiIi", "nc", "fp8-insts") TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_fp8_f32, "ifiiIi", "nc", "fp8-insts") +// OpenCL +LANGBUILTIN(printf, "icC*4.", "fp:0:", ALL_OCL_LANGUAGES) + #undef BUILTIN #undef TARGET_BUILTIN +#undef LANGBUILTIN diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp index c5c2edf1bfe3aba..2597422bdd521a0 100644 --- a/clang/lib/AST/Decl.cpp +++ b/clang/lib/AST/Decl.cpp @@ -49,6 +49,7 @@ #include "clang/Basic/SourceLocation.h" #include "clang/Basic/SourceManager.h" #include "clang/Basic/Specifiers.h" +#include "clang/Basic/TargetBuiltins.h" #include "clang/Basic/TargetCXXABI.h" #include "clang/Basic/TargetInfo.h" #include "clang/Basic/Visibility.h" @@ -3598,6 +3599,12 @@ unsigned FunctionDecl::getBuiltinID(bool ConsiderWrapperFunctions) const { if (!ConsiderWrapperFunctions && getStorageClass() == SC_Static) return 0; + // AMDGCN implementation supports printf as a builtin + // for OpenCL + if (Context.getTargetInfo().getTriple().isAMDGCN() && + Context.getLangOpts().OpenCL && BuiltinID == AMDGPU::BIprintf) +return BuiltinID; + // OpenCL v1.2 s6.9.f - The library functions defined in // the C99 standard headers are not available. if (Context.getLangOpts().OpenCL && diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp index 409ae32ab424215..307cfa49f54e926 100644 --- a/clang/lib/Basic/Targets/AMDGPU.cpp +++ b/clang/lib/Basic/Targets/AMDGPU.cpp @@ -91,6 +91,8 @@ static constexpr Builtin::Info BuiltinInfo[] = { {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES}, #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \ {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES}, +#define LANGBUILTIN(ID, TYPE, ATTRS, LANG) \ + {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, LANG}, #include "clang/Basic/BuiltinsAMDGPU.def" }; diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 09309a3937fb613..987909b5a62e11b 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -2458,6 +2458,11 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, ().getLongDoubleFormat() == ::APFloat::IEEEquad()) BuiltinID = mutateLongDoubleBuiltin(BuiltinID); + // Mutate the printf builtin ID so that we use the same CodeGen path for + // HIP and OpenCL with AMDGPU targets. + if (getTarget().getTriple().isAMDGCN() && BuiltinID == AMDGPU::BIprintf) + BuiltinID = Builtin::BIprintf; + // If the builtin has been declared explicitly with an assembler label, // disable the specialized emitting below. Ideally we should communicate the // rename in IR, or at least avoid generating the intrinsic calls that are >From f9329597564d4e3390f6d0d3a08e4a6f66b52de4 Mon Sep 17 00:00:00 2001 From: Vikram Date: Wed, 15 Nov 2023 01:20:55 -0500 Subject: [PATCH 2/2]