[PATCH] D71365: expand printf when compiling HIP to AMDGPU

2020-01-16 Thread Sameer Sahasrabuddhe via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rGed181efa175d: [HIP][AMDGPU] expand printf when compiling HIP 
to AMDGPU (authored by sameerds).
Herald added a subscriber: kerbowa.

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D71365/new/

https://reviews.llvm.org/D71365

Files:
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/lib/CodeGen/CGGPUBuiltin.cpp
  clang/lib/CodeGen/CodeGenFunction.h
  clang/lib/Driver/ToolChains/HIP.cpp
  clang/test/CodeGenHIP/printf-aggregate.cpp
  clang/test/CodeGenHIP/printf.cpp
  clang/test/Driver/hip-printf.hip
  llvm/include/llvm/Transforms/Utils/AMDGPUEmitPrintf.h
  llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp
  llvm/lib/Transforms/Utils/CMakeLists.txt

Index: llvm/lib/Transforms/Utils/CMakeLists.txt
===
--- llvm/lib/Transforms/Utils/CMakeLists.txt
+++ llvm/lib/Transforms/Utils/CMakeLists.txt
@@ -1,4 +1,5 @@
 add_llvm_component_library(LLVMTransformUtils
+  AMDGPUEmitPrintf.cpp
   ASanStackFrameLayout.cpp
   AddDiscriminators.cpp
   BasicBlockUtils.cpp
Index: llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp
===
--- /dev/null
+++ llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp
@@ -0,0 +1,246 @@
+//===- AMDGPUEmitPrintf.cpp ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+// Utility function to lower a printf call into a series of device
+// library calls on the AMDGPU target.
+//
+// WARNING: This file knows about certain library functions. It recognizes them
+// by name, and hardwires knowledge of their semantics.
+//
+//===--===//
+
+#include "llvm/Transforms/Utils/AMDGPUEmitPrintf.h"
+#include "llvm/ADT/SparseBitVector.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/IRBuilder.h"
+
+#include 
+
+using namespace llvm;
+
+#define DEBUG_TYPE "amdgpu-emit-printf"
+
+static bool isCString(const Value *Arg) {
+  auto Ty = Arg->getType();
+  auto PtrTy = dyn_cast(Ty);
+  if (!PtrTy)
+return false;
+
+  auto IntTy = dyn_cast(PtrTy->getElementType());
+  if (!IntTy)
+return false;
+
+  return IntTy->getBitWidth() == 8;
+}
+
+static Value *fitArgInto64Bits(IRBuilder<> , Value *Arg) {
+  auto Int64Ty = Builder.getInt64Ty();
+  auto Ty = Arg->getType();
+
+  if (auto IntTy = dyn_cast(Ty)) {
+switch (IntTy->getBitWidth()) {
+case 32:
+  return Builder.CreateZExt(Arg, Int64Ty);
+case 64:
+  return Arg;
+}
+  }
+
+  if (Ty->getTypeID() == Type::DoubleTyID) {
+return Builder.CreateBitCast(Arg, Int64Ty);
+  }
+
+  if (auto PtrTy = dyn_cast(Ty)) {
+return Builder.CreatePtrToInt(Arg, Int64Ty);
+  }
+
+  llvm_unreachable("unexpected type");
+}
+
+static Value *callPrintfBegin(IRBuilder<> , Value *Version) {
+  auto Int64Ty = Builder.getInt64Ty();
+  auto M = Builder.GetInsertBlock()->getModule();
+  auto Fn = M->getOrInsertFunction("__ockl_printf_begin", Int64Ty, Int64Ty);
+  return Builder.CreateCall(Fn, Version);
+}
+
+static Value *callAppendArgs(IRBuilder<> , Value *Desc, int NumArgs,
+ Value *Arg0, Value *Arg1, Value *Arg2, Value *Arg3,
+ Value *Arg4, Value *Arg5, Value *Arg6,
+ bool IsLast) {
+  auto Int64Ty = Builder.getInt64Ty();
+  auto Int32Ty = Builder.getInt32Ty();
+  auto M = Builder.GetInsertBlock()->getModule();
+  auto Fn = M->getOrInsertFunction("__ockl_printf_append_args", Int64Ty,
+   Int64Ty, Int32Ty, Int64Ty, Int64Ty, Int64Ty,
+   Int64Ty, Int64Ty, Int64Ty, Int64Ty, Int32Ty);
+  auto IsLastValue = Builder.getInt32(IsLast);
+  auto NumArgsValue = Builder.getInt32(NumArgs);
+  return Builder.CreateCall(Fn, {Desc, NumArgsValue, Arg0, Arg1, Arg2, Arg3,
+ Arg4, Arg5, Arg6, IsLastValue});
+}
+
+static Value *appendArg(IRBuilder<> , Value *Desc, Value *Arg,
+bool IsLast) {
+  auto Arg0 = fitArgInto64Bits(Builder, Arg);
+  auto Zero = Builder.getInt64(0);
+  return callAppendArgs(Builder, Desc, 1, Arg0, Zero, Zero, Zero, Zero, Zero,
+Zero, IsLast);
+}
+
+// The device library does not provide strlen, so we build our own loop
+// here. While we are at it, we also include the terminating null in the length.
+static Value *getStrlenWithNull(IRBuilder<> , Value *Str) {
+  auto *Prev = Builder.GetInsertBlock();
+  Module *M = Prev->getModule();
+
+  auto CharZero = Builder.getInt8(0);
+  auto One = Builder.getInt64(1);
+  auto Zero = 

[PATCH] D71365: expand printf when compiling HIP to AMDGPU

2020-01-15 Thread Matt Arsenault via Phabricator via cfe-commits
arsenm accepted this revision.
arsenm added a comment.
This revision is now accepted and ready to land.

LGTM


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D71365/new/

https://reviews.llvm.org/D71365



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D71365: expand printf when compiling HIP to AMDGPU

2020-01-14 Thread Sameer Sahasrabuddhe via Phabricator via cfe-commits
sameerds marked 2 inline comments as done.
sameerds added inline comments.



Comment at: clang/test/CodeGenHIP/printf.cpp:18
+}
+
+// CHECK: [[BEGIN:%.*]]   = call i64 @__ockl_printf_begin(i64 0)

arsenm wrote:
> sameerds wrote:
> > sameerds wrote:
> > > arsenm wrote:
> > > > This could use a lot more testcases. Can you add some half, float, and 
> > > > double as well as pointers (including different address spaces) and 
> > > > vectors?
> > > I am not sure what exactly should be tested. The validity of this 
> > > expansion depends on the signature of the builtin printf function. Since 
> > > printf is variadic, the "default argument promotions" in the C/C++ spec 
> > > guarantee that the arguments are 32/64 bit integers or doubles if they 
> > > are not pointers. The printf signature as well as the device library 
> > > functions are defined using only generic pointers, so the address space 
> > > does not matter. Non-scalar arguments are not supported, which is checked 
> > > by another test using a struct. I could add a vector there, but that 
> > > doesn't seem to be adding any value.
> > Bump!
> The address space shouldn't matter, but it's good to make sure it doesn't. 
> 
> Vector arguments are 100% supported in OpenCL printf, and are not subject to 
> argument promotion. You have to use a width modifier for them though.
Added a test with address spaces on the pointer arguments.

Vectors are supported in OpenCL, but this initial implementation is 
specifically for HIP. The printf expansion is invoked by Clang CodeGen only 
when the language is HIP.

Vector support will arrive later. It's not sufficient to just implement the 
transmission via hostcall; it also needs changes to the printf processing 
performed by the hostcall listener thread on the host.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D71365/new/

https://reviews.llvm.org/D71365



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D71365: expand printf when compiling HIP to AMDGPU

2020-01-14 Thread Sameer Sahasrabuddhe via Phabricator via cfe-commits
sameerds updated this revision to Diff 237864.
sameerds added a comment.

- Added a test for address spaces
- Removed an unnecessary addrspacecast in the strlen expansion
- Updated the tests to pass -fcuda-is-device
- printf is not a builtin for device code, so removed the code and tests 
related to that


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D71365/new/

https://reviews.llvm.org/D71365

Files:
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/lib/CodeGen/CGGPUBuiltin.cpp
  clang/lib/CodeGen/CodeGenFunction.h
  clang/lib/Driver/ToolChains/HIP.cpp
  clang/test/CodeGenHIP/printf-aggregate.cpp
  clang/test/CodeGenHIP/printf.cpp
  clang/test/Driver/hip-printf.hip
  llvm/include/llvm/Transforms/Utils/AMDGPUEmitPrintf.h
  llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp
  llvm/lib/Transforms/Utils/CMakeLists.txt

Index: llvm/lib/Transforms/Utils/CMakeLists.txt
===
--- llvm/lib/Transforms/Utils/CMakeLists.txt
+++ llvm/lib/Transforms/Utils/CMakeLists.txt
@@ -1,4 +1,5 @@
 add_llvm_component_library(LLVMTransformUtils
+  AMDGPUEmitPrintf.cpp
   ASanStackFrameLayout.cpp
   AddDiscriminators.cpp
   BasicBlockUtils.cpp
Index: llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp
===
--- /dev/null
+++ llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp
@@ -0,0 +1,246 @@
+//===- AMDGPUEmitPrintf.cpp ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+// Utility function to lower a printf call into a series of device
+// library calls on the AMDGPU target.
+//
+// WARNING: This file knows about certain library functions. It recognizes them
+// by name, and hardwires knowledge of their semantics.
+//
+//===--===//
+
+#include "llvm/Transforms/Utils/AMDGPUEmitPrintf.h"
+#include "llvm/ADT/SparseBitVector.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/IRBuilder.h"
+
+#include 
+
+using namespace llvm;
+
+#define DEBUG_TYPE "amdgpu-emit-printf"
+
+static bool isCString(const Value *Arg) {
+  auto Ty = Arg->getType();
+  auto PtrTy = dyn_cast(Ty);
+  if (!PtrTy)
+return false;
+
+  auto IntTy = dyn_cast(PtrTy->getElementType());
+  if (!IntTy)
+return false;
+
+  return IntTy->getBitWidth() == 8;
+}
+
+static Value *fitArgInto64Bits(IRBuilder<> , Value *Arg) {
+  auto Int64Ty = Builder.getInt64Ty();
+  auto Ty = Arg->getType();
+
+  if (auto IntTy = dyn_cast(Ty)) {
+switch (IntTy->getBitWidth()) {
+case 32:
+  return Builder.CreateZExt(Arg, Int64Ty);
+case 64:
+  return Arg;
+}
+  }
+
+  if (Ty->getTypeID() == Type::DoubleTyID) {
+return Builder.CreateBitCast(Arg, Int64Ty);
+  }
+
+  if (auto PtrTy = dyn_cast(Ty)) {
+return Builder.CreatePtrToInt(Arg, Int64Ty);
+  }
+
+  llvm_unreachable("unexpected type");
+}
+
+static Value *callPrintfBegin(IRBuilder<> , Value *Version) {
+  auto Int64Ty = Builder.getInt64Ty();
+  auto M = Builder.GetInsertBlock()->getModule();
+  auto Fn = M->getOrInsertFunction("__ockl_printf_begin", Int64Ty, Int64Ty);
+  return Builder.CreateCall(Fn, Version);
+}
+
+static Value *callAppendArgs(IRBuilder<> , Value *Desc, int NumArgs,
+ Value *Arg0, Value *Arg1, Value *Arg2, Value *Arg3,
+ Value *Arg4, Value *Arg5, Value *Arg6,
+ bool IsLast) {
+  auto Int64Ty = Builder.getInt64Ty();
+  auto Int32Ty = Builder.getInt32Ty();
+  auto M = Builder.GetInsertBlock()->getModule();
+  auto Fn = M->getOrInsertFunction("__ockl_printf_append_args", Int64Ty,
+   Int64Ty, Int32Ty, Int64Ty, Int64Ty, Int64Ty,
+   Int64Ty, Int64Ty, Int64Ty, Int64Ty, Int32Ty);
+  auto IsLastValue = Builder.getInt32(IsLast);
+  auto NumArgsValue = Builder.getInt32(NumArgs);
+  return Builder.CreateCall(Fn, {Desc, NumArgsValue, Arg0, Arg1, Arg2, Arg3,
+ Arg4, Arg5, Arg6, IsLastValue});
+}
+
+static Value *appendArg(IRBuilder<> , Value *Desc, Value *Arg,
+bool IsLast) {
+  auto Arg0 = fitArgInto64Bits(Builder, Arg);
+  auto Zero = Builder.getInt64(0);
+  return callAppendArgs(Builder, Desc, 1, Arg0, Zero, Zero, Zero, Zero, Zero,
+Zero, IsLast);
+}
+
+// The device library does not provide strlen, so we build our own loop
+// here. While we are at it, we also include the terminating null in the length.
+static Value *getStrlenWithNull(IRBuilder<> , Value *Str) {
+  auto *Prev = Builder.GetInsertBlock();
+  Module *M = Prev->getModule();
+
+  

[PATCH] D71365: expand printf when compiling HIP to AMDGPU

2020-01-13 Thread Matt Arsenault via Phabricator via cfe-commits
arsenm added a comment.

Mostly looks fine, except vectors are supposed to work




Comment at: llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp:52
+  } else if (Ty->getTypeID() == Type::DoubleTyID) {
+return Builder.CreateBitCast(Arg, Int64Ty);
+  } else if (auto PtrTy = dyn_cast(Ty)) {

No else after return



Comment at: llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp:57-58
+
+  llvm_unreachable("unexpected type");
+  return Builder.getInt64(0);
+}

Dead code


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D71365/new/

https://reviews.llvm.org/D71365



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D71365: expand printf when compiling HIP to AMDGPU

2020-01-13 Thread Matt Arsenault via Phabricator via cfe-commits
arsenm added inline comments.



Comment at: clang/test/CodeGenHIP/printf.cpp:18
+}
+
+// CHECK: [[BEGIN:%.*]]   = call i64 @__ockl_printf_begin(i64 0)

sameerds wrote:
> sameerds wrote:
> > arsenm wrote:
> > > This could use a lot more testcases. Can you add some half, float, and 
> > > double as well as pointers (including different address spaces) and 
> > > vectors?
> > I am not sure what exactly should be tested. The validity of this expansion 
> > depends on the signature of the builtin printf function. Since printf is 
> > variadic, the "default argument promotions" in the C/C++ spec guarantee 
> > that the arguments are 32/64 bit integers or doubles if they are not 
> > pointers. The printf signature as well as the device library functions are 
> > defined using only generic pointers, so the address space does not matter. 
> > Non-scalar arguments are not supported, which is checked by another test 
> > using a struct. I could add a vector there, but that doesn't seem to be 
> > adding any value.
> Bump!
The address space shouldn't matter, but it's good to make sure it doesn't. 

Vector arguments are 100% supported in OpenCL printf, and are not subject to 
argument promotion. You have to use a width modifier for them though.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D71365/new/

https://reviews.llvm.org/D71365



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D71365: expand printf when compiling HIP to AMDGPU

2020-01-09 Thread Sameer Sahasrabuddhe via Phabricator via cfe-commits
sameerds added inline comments.



Comment at: clang/test/CodeGenHIP/printf.cpp:18
+}
+
+// CHECK: [[BEGIN:%.*]]   = call i64 @__ockl_printf_begin(i64 0)

sameerds wrote:
> arsenm wrote:
> > This could use a lot more testcases. Can you add some half, float, and 
> > double as well as pointers (including different address spaces) and vectors?
> I am not sure what exactly should be tested. The validity of this expansion 
> depends on the signature of the builtin printf function. Since printf is 
> variadic, the "default argument promotions" in the C/C++ spec guarantee that 
> the arguments are 32/64 bit integers or doubles if they are not pointers. The 
> printf signature as well as the device library functions are defined using 
> only generic pointers, so the address space does not matter. Non-scalar 
> arguments are not supported, which is checked by another test using a struct. 
> I could add a vector there, but that doesn't seem to be adding any value.
Bump!


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D71365/new/

https://reviews.llvm.org/D71365



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D71365: expand printf when compiling HIP to AMDGPU

2020-01-07 Thread Sameer Sahasrabuddhe via Phabricator via cfe-commits
sameerds added a comment.

In D71365#1807872 , @b-sumner wrote:

> Should this be looking forward to also handling OpenCL, which does require 
> vector support?


Sure. The implementation is general enough that we can point at the two places 
in AMDGPUEmitPrintf.cpp that need to be enhanced for OpenCL:

1. processArg() should recognize a vector argument and transmit its elements to 
the host correctly.
2. locateCStrings() should deal with all the additional OpenCL specifiers (like 
vectors and half data-types) as needed.

Then depending on the input language, Clang codegen can indicate whether to 
support these extensions or emit an error.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D71365/new/

https://reviews.llvm.org/D71365



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D71365: expand printf when compiling HIP to AMDGPU

2020-01-07 Thread Brian Sumner via Phabricator via cfe-commits
b-sumner added a comment.

Should this be looking forward to also handling OpenCL, which does require 
vector support?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D71365/new/

https://reviews.llvm.org/D71365



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D71365: expand printf when compiling HIP to AMDGPU

2020-01-06 Thread Sameer Sahasrabuddhe via Phabricator via cfe-commits
sameerds added inline comments.



Comment at: clang/test/CodeGenHIP/printf.cpp:18
+}
+
+// CHECK: [[BEGIN:%.*]]   = call i64 @__ockl_printf_begin(i64 0)

arsenm wrote:
> This could use a lot more testcases. Can you add some half, float, and double 
> as well as pointers (including different address spaces) and vectors?
I am not sure what exactly should be tested. The validity of this expansion 
depends on the signature of the builtin printf function. Since printf is 
variadic, the "default argument promotions" in the C/C++ spec guarantee that 
the arguments are 32/64 bit integers or doubles if they are not pointers. The 
printf signature as well as the device library functions are defined using only 
generic pointers, so the address space does not matter. Non-scalar arguments 
are not supported, which is checked by another test using a struct. I could add 
a vector there, but that doesn't seem to be adding any value.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D71365/new/

https://reviews.llvm.org/D71365



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D71365: expand printf when compiling HIP to AMDGPU

2020-01-06 Thread Matt Arsenault via Phabricator via cfe-commits
arsenm added inline comments.



Comment at: clang/test/CodeGenHIP/printf.cpp:18
+}
+
+// CHECK: [[BEGIN:%.*]]   = call i64 @__ockl_printf_begin(i64 0)

This could use a lot more testcases. Can you add some half, float, and double 
as well as pointers (including different address spaces) and vectors?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D71365/new/

https://reviews.llvm.org/D71365



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D71365: expand printf when compiling HIP to AMDGPU

2020-01-06 Thread Sameer Sahasrabuddhe via Phabricator via cfe-commits
sameerds updated this revision to Diff 236312.
sameerds edited the summary of this revision.
sameerds added a comment.

Improved the test defined in clang/test/CodeGenHIP/printf.cpp


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D71365/new/

https://reviews.llvm.org/D71365

Files:
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/lib/CodeGen/CGGPUBuiltin.cpp
  clang/lib/CodeGen/CodeGenFunction.h
  clang/lib/Driver/ToolChains/HIP.cpp
  clang/test/CodeGenHIP/printf-aggregate.cpp
  clang/test/CodeGenHIP/printf.cpp
  clang/test/Driver/hip-printf.hip
  llvm/include/llvm/Transforms/Utils/AMDGPUEmitPrintf.h
  llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp
  llvm/lib/Transforms/Utils/CMakeLists.txt

Index: llvm/lib/Transforms/Utils/CMakeLists.txt
===
--- llvm/lib/Transforms/Utils/CMakeLists.txt
+++ llvm/lib/Transforms/Utils/CMakeLists.txt
@@ -1,4 +1,5 @@
 add_llvm_component_library(LLVMTransformUtils
+  AMDGPUEmitPrintf.cpp
   ASanStackFrameLayout.cpp
   AddDiscriminators.cpp
   BasicBlockUtils.cpp
Index: llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp
===
--- /dev/null
+++ llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp
@@ -0,0 +1,243 @@
+//===- AMDGPUEmitPrintf.cpp ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+// Utility function to lower a printf call into a series of device
+// library calls on the AMDGPU target.
+//
+// WARNING: This file knows about certain library functions. It recognizes them
+// by name, and hardwires knowledge of their semantics.
+//
+//===--===//
+
+#include "llvm/Transforms/Utils/AMDGPUEmitPrintf.h"
+#include "llvm/ADT/SparseBitVector.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/IRBuilder.h"
+
+#include 
+
+using namespace llvm;
+
+#define DEBUG_TYPE "amdgpu-emit-printf"
+
+static bool isCString(const Value *Arg) {
+  auto Ty = Arg->getType();
+  auto PtrTy = dyn_cast(Ty);
+  if (!PtrTy)
+return false;
+
+  auto IntTy = dyn_cast(PtrTy->getElementType());
+  if (!IntTy)
+return false;
+
+  return IntTy->getBitWidth() == 8;
+}
+
+static Value *fitArgInto64Bits(IRBuilder<> , Value *Arg) {
+  auto Int64Ty = Builder.getInt64Ty();
+  auto Ty = Arg->getType();
+  if (auto IntTy = dyn_cast(Ty)) {
+switch (IntTy->getBitWidth()) {
+case 32:
+  return Builder.CreateZExt(Arg, Int64Ty);
+case 64:
+  return Arg;
+}
+  } else if (Ty->getTypeID() == Type::DoubleTyID) {
+return Builder.CreateBitCast(Arg, Int64Ty);
+  } else if (auto PtrTy = dyn_cast(Ty)) {
+return Builder.CreatePtrToInt(Arg, Int64Ty);
+  }
+
+  llvm_unreachable("unexpected type");
+  return Builder.getInt64(0);
+}
+
+static Value *callPrintfBegin(IRBuilder<> , Value *Version) {
+  auto Int64Ty = Builder.getInt64Ty();
+  auto M = Builder.GetInsertBlock()->getModule();
+  auto Fn = M->getOrInsertFunction("__ockl_printf_begin", Int64Ty, Int64Ty);
+  return Builder.CreateCall(Fn, Version);
+}
+
+static Value *callAppendArgs(IRBuilder<> , Value *Desc, int NumArgs,
+ Value *Arg0, Value *Arg1, Value *Arg2, Value *Arg3,
+ Value *Arg4, Value *Arg5, Value *Arg6,
+ bool IsLast) {
+  auto Int64Ty = Builder.getInt64Ty();
+  auto Int32Ty = Builder.getInt32Ty();
+  auto M = Builder.GetInsertBlock()->getModule();
+  auto Fn = M->getOrInsertFunction("__ockl_printf_append_args", Int64Ty,
+   Int64Ty, Int32Ty, Int64Ty, Int64Ty, Int64Ty,
+   Int64Ty, Int64Ty, Int64Ty, Int64Ty, Int32Ty);
+  auto IsLastValue = Builder.getInt32(IsLast);
+  auto NumArgsValue = Builder.getInt32(NumArgs);
+  return Builder.CreateCall(Fn, {Desc, NumArgsValue, Arg0, Arg1, Arg2, Arg3,
+ Arg4, Arg5, Arg6, IsLastValue});
+}
+
+static Value *appendArg(IRBuilder<> , Value *Desc, Value *Arg,
+bool IsLast) {
+  auto Arg0 = fitArgInto64Bits(Builder, Arg);
+  auto Zero = Builder.getInt64(0);
+  return callAppendArgs(Builder, Desc, 1, Arg0, Zero, Zero, Zero, Zero, Zero,
+Zero, IsLast);
+}
+
+// The device library does not provide strlen, so we build our own loop
+// here. While we are at it, we also include the terminating null in the length.
+static Value *getStrlenWithNull(IRBuilder<> , Value *Str) {
+  auto *Prev = Builder.GetInsertBlock();
+  Module *M = Prev->getModule();
+
+  auto CharZero = Builder.getInt8(0);
+  auto One = Builder.getInt64(1);
+  auto Zero = 

[PATCH] D71365: expand printf when compiling HIP to AMDGPU

2019-12-11 Thread Sameer Sahasrabuddhe via Phabricator via cfe-commits
sameerds created this revision.
Herald added subscribers: llvm-commits, cfe-commits, hiraditya, t-tye, tpr, 
dstuttard, yaxunl, mgorny, nhaehnle, wdng, jvesely, kzhuravl.
Herald added projects: clang, LLVM.

This change implements the expansion in two parts:

- Add a utility function emitAMDGPUPrintfCall() in LLVM.
- Invoke the above function from Clang CodeGen, when processing a HIP program 
for the AMDGPU target.

The printf expansion has undefined behaviour if the format string is
not a compile-time constant. As a sufficient condition, the HIP
ToolChain now emits -Werror=format-nonliteral.

clang format


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D71365

Files:
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/lib/CodeGen/CGGPUBuiltin.cpp
  clang/lib/CodeGen/CodeGenFunction.h
  clang/lib/Driver/ToolChains/HIP.cpp
  clang/test/CodeGenHIP/printf-aggregate.cpp
  clang/test/CodeGenHIP/printf.cpp
  clang/test/Driver/hip-printf.hip
  llvm/include/llvm/Transforms/Utils/AMDGPUEmitPrintf.h
  llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp
  llvm/lib/Transforms/Utils/CMakeLists.txt

Index: llvm/lib/Transforms/Utils/CMakeLists.txt
===
--- llvm/lib/Transforms/Utils/CMakeLists.txt
+++ llvm/lib/Transforms/Utils/CMakeLists.txt
@@ -1,4 +1,5 @@
 add_llvm_component_library(LLVMTransformUtils
+  AMDGPUEmitPrintf.cpp
   ASanStackFrameLayout.cpp
   AddDiscriminators.cpp
   BasicBlockUtils.cpp
Index: llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp
===
--- /dev/null
+++ llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp
@@ -0,0 +1,243 @@
+//===- AMDGPUEmitPrintf.cpp ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+// Utility function to lower a printf call into a series of device
+// library calls on the AMDGPU target.
+//
+// WARNING: This file knows about certain library functions. It recognizes them
+// by name, and hardwires knowledge of their semantics.
+//
+//===--===//
+
+#include "llvm/Transforms/Utils/AMDGPUEmitPrintf.h"
+#include "llvm/ADT/SparseBitVector.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/IRBuilder.h"
+
+#include 
+
+using namespace llvm;
+
+#define DEBUG_TYPE "amdgpu-emit-printf"
+
+static bool isCString(const Value *Arg) {
+  auto Ty = Arg->getType();
+  auto PtrTy = dyn_cast(Ty);
+  if (!PtrTy)
+return false;
+
+  auto IntTy = dyn_cast(PtrTy->getElementType());
+  if (!IntTy)
+return false;
+
+  return IntTy->getBitWidth() == 8;
+}
+
+static Value *fitArgInto64Bits(IRBuilder<> , Value *Arg) {
+  auto Int64Ty = Builder.getInt64Ty();
+  auto Ty = Arg->getType();
+  if (auto IntTy = dyn_cast(Ty)) {
+switch (IntTy->getBitWidth()) {
+case 32:
+  return Builder.CreateZExt(Arg, Int64Ty);
+case 64:
+  return Arg;
+}
+  } else if (Ty->getTypeID() == Type::DoubleTyID) {
+return Builder.CreateBitCast(Arg, Int64Ty);
+  } else if (auto PtrTy = dyn_cast(Ty)) {
+return Builder.CreatePtrToInt(Arg, Int64Ty);
+  }
+
+  llvm_unreachable("unexpected type");
+  return Builder.getInt64(0);
+}
+
+static Value *callPrintfBegin(IRBuilder<> , Value *Version) {
+  auto Int64Ty = Builder.getInt64Ty();
+  auto M = Builder.GetInsertBlock()->getModule();
+  auto Fn = M->getOrInsertFunction("__ockl_printf_begin", Int64Ty, Int64Ty);
+  return Builder.CreateCall(Fn, Version);
+}
+
+static Value *callAppendArgs(IRBuilder<> , Value *Desc, int NumArgs,
+ Value *Arg0, Value *Arg1, Value *Arg2, Value *Arg3,
+ Value *Arg4, Value *Arg5, Value *Arg6,
+ bool IsLast) {
+  auto Int64Ty = Builder.getInt64Ty();
+  auto Int32Ty = Builder.getInt32Ty();
+  auto M = Builder.GetInsertBlock()->getModule();
+  auto Fn = M->getOrInsertFunction("__ockl_printf_append_args", Int64Ty,
+   Int64Ty, Int32Ty, Int64Ty, Int64Ty, Int64Ty,
+   Int64Ty, Int64Ty, Int64Ty, Int64Ty, Int32Ty);
+  auto IsLastValue = Builder.getInt32(IsLast);
+  auto NumArgsValue = Builder.getInt32(NumArgs);
+  return Builder.CreateCall(Fn, {Desc, NumArgsValue, Arg0, Arg1, Arg2, Arg3,
+ Arg4, Arg5, Arg6, IsLastValue});
+}
+
+static Value *appendArg(IRBuilder<> , Value *Desc, Value *Arg,
+bool IsLast) {
+  auto Arg0 = fitArgInto64Bits(Builder, Arg);
+  auto Zero = Builder.getInt64(0);
+  return callAppendArgs(Builder, Desc, 1, Arg0, Zero, Zero, Zero, Zero, Zero,
+Zero, IsLast);
+}
+
+// The device library does