[PATCH] D44984: [HIP] Add hip file type and codegen for kernel launching

2018-04-17 Thread Yaxun Liu via Phabricator via cfe-commits
yaxunl marked 2 inline comments as done.
yaxunl added inline comments.



Comment at: lib/Frontend/CompilerInvocation.cpp:2109
+  Opts.HIP = true;
+  }
+

rjmccall wrote:
> yaxunl wrote:
> > rjmccall wrote:
> > > Why is this done here?  We infer the language mode from the input kind 
> > > somewhere else.
> > It is usually done through CompilerInvocation::setLangDefaults. However, 
> > HIP does not have its own input kind nor is it defined as a language 
> > standard. Therefore it cannot use CompilerInvocation::setLangDefaults to 
> > set Opts.HIP. 
> What are the values of -x if not input kinds or language standards?
I will add hip as input kind and language standard since it really is both.


https://reviews.llvm.org/D44984



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D44984: [HIP] Add hip file type and codegen for kernel launching

2018-04-14 Thread John McCall via Phabricator via cfe-commits
rjmccall added inline comments.



Comment at: lib/Frontend/CompilerInvocation.cpp:2109
+  Opts.HIP = true;
+  }
+

yaxunl wrote:
> rjmccall wrote:
> > Why is this done here?  We infer the language mode from the input kind 
> > somewhere else.
> It is usually done through CompilerInvocation::setLangDefaults. However, HIP 
> does not have its own input kind nor is it defined as a language standard. 
> Therefore it cannot use CompilerInvocation::setLangDefaults to set Opts.HIP. 
What are the values of -x if not input kinds or language standards?


https://reviews.llvm.org/D44984



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D44984: [HIP] Add hip file type and codegen for kernel launching

2018-04-13 Thread Yaxun Liu via Phabricator via cfe-commits
yaxunl updated this revision to Diff 142422.
yaxunl marked an inline comment as done.
yaxunl added a comment.

Revised by John's comments.


https://reviews.llvm.org/D44984

Files:
  include/clang/Basic/LangOptions.def
  lib/CodeGen/CGCUDANV.cpp
  lib/CodeGen/CGDecl.cpp
  lib/CodeGen/CodeGenModule.cpp
  lib/Frontend/CompilerInvocation.cpp
  lib/Frontend/InitPreprocessor.cpp
  lib/Sema/SemaCUDA.cpp
  lib/Sema/SemaDecl.cpp
  test/CodeGenCUDA/device-stub.cu

Index: test/CodeGenCUDA/device-stub.cu
===
--- test/CodeGenCUDA/device-stub.cu
+++ test/CodeGenCUDA/device-stub.cu
@@ -1,8 +1,11 @@
 // RUN: echo "GPU binary would be here" > %t
-// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -fcuda-include-gpubinary %t -o - | FileCheck %s
-// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -fcuda-include-gpubinary %t -o -  -DNOGLOBALS \
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \
+// RUN:   -fcuda-include-gpubinary %t -o - | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \
+// RUN:   -fcuda-include-gpubinary %t -o -  -DNOGLOBALS \
 // RUN:   | FileCheck %s -check-prefix=NOGLOBALS
-// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s -check-prefix=NOGPUBIN
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -o - \
+// RUN:   | FileCheck %s -check-prefix=NOGPUBIN
 
 #include "Inputs/cuda.h"
 
@@ -77,10 +80,14 @@
 // Test that we've built a function to register kernels and global vars.
 // CHECK: define internal void @__cuda_register_globals
 // CHECK: call{{.*}}cudaRegisterFunction(i8** %0, {{.*}}kernelfunc
-// CHECK-DAG: call{{.*}}cudaRegisterVar(i8** %0, {{.*}}device_var{{.*}}i32 0, i32 4, i32 0, i32 0
-// CHECK-DAG: call{{.*}}cudaRegisterVar(i8** %0, {{.*}}constant_var{{.*}}i32 0, i32 4, i32 1, i32 0
-// CHECK-DAG: call{{.*}}cudaRegisterVar(i8** %0, {{.*}}ext_device_var{{.*}}i32 1, i32 4, i32 0, i32 0
-// CHECK-DAG: call{{.*}}cudaRegisterVar(i8** %0, {{.*}}ext_constant_var{{.*}}i32 1, i32 4, i32 1, i32 0
+// CHECK-DAG: call{{.*}}cudaRegisterVar(i8** %0, {{.*}}device_var{{.*}}
+// CHECK-DAG-SAME:  i32 0, i32 4, i32 0, i32 0
+// CHECK-DAG: call{{.*}}cudaRegisterVar(i8** %0, {{.*}}constant_var{{.*}}
+// CHECK-DAG-SAME:  i32 0, i32 4, i32 1, i32 0
+// CHECK-DAG: call{{.*}}cudaRegisterVar(i8** %0, {{.*}}ext_device_var{{.*}}
+// CHECK-DAG-SAME:  i32 1, i32 4, i32 0, i32 0
+// CHECK-DAG: call{{.*}}cudaRegisterVar(i8** %0, {{.*}}ext_constant_var{{.*}}
+// CHECK-DAG-SAME:  i32 1, i32 4, i32 1, i32 0
 // CHECK: ret void
 
 // Test that we've built contructor..
Index: lib/Sema/SemaDecl.cpp
===
--- lib/Sema/SemaDecl.cpp
+++ lib/Sema/SemaDecl.cpp
@@ -9048,11 +9048,13 @@
 
   if (getLangOpts().CUDA) {
 IdentifierInfo *II = NewFD->getIdentifier();
-if (II && II->isStr("cudaConfigureCall") && !NewFD->isInvalidDecl() &&
+if (II &&
+((getLangOpts().HIP && II->isStr("hipConfigureCall")) ||
+ (!getLangOpts().HIP && II->isStr("cudaConfigureCall"))) &&
+!NewFD->isInvalidDecl() &&
 NewFD->getDeclContext()->getRedeclContext()->isTranslationUnit()) {
   if (!R->getAs()->getReturnType()->isScalarType())
 Diag(NewFD->getLocation(), diag::err_config_scalar_return);
-
   Context.setcudaConfigureCallDecl(NewFD);
 }
 
Index: lib/Sema/SemaCUDA.cpp
===
--- lib/Sema/SemaCUDA.cpp
+++ lib/Sema/SemaCUDA.cpp
@@ -42,8 +42,9 @@
  SourceLocation GGGLoc) {
   FunctionDecl *ConfigDecl = Context.getcudaConfigureCallDecl();
   if (!ConfigDecl)
-return ExprError(Diag(oc, diag::err_undeclared_var_use)
- << "cudaConfigureCall");
+return ExprError(
+Diag(oc, diag::err_undeclared_var_use)
+<< (getLangOpts().HIP ? "hipConfigureCall" : "cudaConfigureCall"));
   QualType ConfigQTy = ConfigDecl->getType();
 
   DeclRefExpr *ConfigDR = new (Context)
Index: lib/Frontend/InitPreprocessor.cpp
===
--- lib/Frontend/InitPreprocessor.cpp
+++ lib/Frontend/InitPreprocessor.cpp
@@ -465,6 +465,8 @@
 Builder.defineMacro("__ASSEMBLER__");
   if (LangOpts.CUDA)
 Builder.defineMacro("__CUDA__");
+  if (LangOpts.HIP)
+Builder.defineMacro("__HIP__");
 }
 
 /// Initialize the predefined C++ language feature test macros defined in
Index: lib/Frontend/CompilerInvocation.cpp
===
--- lib/Frontend/CompilerInvocation.cpp
+++ lib/Frontend/CompilerInvocation.cpp
@@ -1564,6 +1564,7 @@
 .Case("c", InputKind::C)
 .Case("cl", InputKind::OpenCL)
 .Case("cuda", InputKind::CUDA)
+.Case("hip", InputKind::CUDA)
 .Case("c++", InputKind::CXX)
 

[PATCH] D44984: [HIP] Add hip file type and codegen for kernel launching

2018-04-13 Thread Yaxun Liu via Phabricator via cfe-commits
yaxunl marked 4 inline comments as done.
yaxunl added inline comments.



Comment at: lib/Frontend/CompilerInvocation.cpp:2109
+  Opts.HIP = true;
+  }
+

rjmccall wrote:
> Why is this done here?  We infer the language mode from the input kind 
> somewhere else.
It is usually done through CompilerInvocation::setLangDefaults. However, HIP 
does not have its own input kind nor is it defined as a language standard. 
Therefore it cannot use CompilerInvocation::setLangDefaults to set Opts.HIP. 


https://reviews.llvm.org/D44984



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D44984: [HIP] Add hip file type and codegen for kernel launching

2018-04-13 Thread John McCall via Phabricator via cfe-commits
rjmccall added inline comments.



Comment at: lib/CodeGen/CGCUDANV.cpp:98
+std::string CGNVCUDARuntime::addPrefixToName(CodeGenModule ,
+ std::string FuncName) const {
+  if (CGM.getLangOpts().HIP)

Can you take these as StringRefs or Twines?





Comment at: lib/CodeGen/CGCUDANV.cpp:104
+std::string CGNVCUDARuntime::addPrefixToNameBar(CodeGenModule ,
+std::string FuncName) const {
+  if (CGM.getLangOpts().HIP)

I think "addUnderscoredPrefixToName" would be better.



Comment at: lib/CodeGen/CGCUDANV.cpp:134
 llvm::Constant *CGNVCUDARuntime::getLaunchFn() const {
   // cudaError_t cudaLaunch(char *)
+  if (CGM.getLangOpts().HIP)

Please move this comment down into the else clause (and terminate it with a 
semicolon) and add your own declaration comment in your clause.



Comment at: lib/Frontend/CompilerInvocation.cpp:2109
+  Opts.HIP = true;
+  }
+

Why is this done here?  We infer the language mode from the input kind 
somewhere else.


https://reviews.llvm.org/D44984



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D44984: [HIP] Add hip file type and codegen for kernel launching

2018-04-09 Thread Yaxun Liu via Phabricator via cfe-commits
yaxunl added a comment.

ping. Any further changes need to be done for this patch? Thanks.


https://reviews.llvm.org/D44984



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D44984: [HIP] Add hip file type and codegen for kernel launching

2018-03-29 Thread Yaxun Liu via Phabricator via cfe-commits
yaxunl marked an inline comment as done.
yaxunl added a comment.

> 
> 
>> Since the header file and library seem not to affect this patch, is it OK to 
>> defer their changes to be part of the toolchain patch?
> 
> I'm not sure I understand. Could you elaborate?

clang -cc1 does not include `__clang_cuda_runtime_wrapper.h` by default when 
clang -cc1 is called directly to compile CUDA programs. CUDA toolchain adds 
-include `__clang_cuda_runtime_wrapper.h` when compiling CUDA program as kernel 
code. Therefore if clang -cc1 is used to compile HIP program in lit test, there 
is no need to use `-fnocudainc`.

This patch mainly changes kernel launching API function names. The implement 
and testing of this change does not depend on the CUDA/HIP header files. A 
minimum header like test/CodeGenCUDA/Input/cuda.h is sufficient for testing 
this patch.

Basically this patch is only concerns about -cc1 and therefore is independent 
of the toolchain changes about header and library files.


https://reviews.llvm.org/D44984



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D44984: [HIP] Add hip file type and codegen for kernel launching

2018-03-28 Thread Yaxun Liu via Phabricator via cfe-commits
yaxunl added a comment.

In https://reviews.llvm.org/D44984#1050672, @rjmccall wrote:

> You should send an RFC to cfe-dev about adding this new language mode.  I 
> understand that it's very similar to an existing language mode that we 
> already support, and that's definitely we'll consider, but we shouldn't just 
> agree to add new language modes in patch review.


RFC sent http://lists.llvm.org/pipermail/cfe-dev/2018-March/057426.html

Thanks.


https://reviews.llvm.org/D44984



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D44984: [HIP] Add hip file type and codegen for kernel launching

2018-03-28 Thread John McCall via Phabricator via cfe-commits
rjmccall added a comment.

You should send an RFC to cfe-dev about adding this new language mode.  I 
understand that it's very similar to an existing language mode that we already 
support, and that's definitely we'll consider, but we shouldn't just agree to 
add new language modes in patch review.


https://reviews.llvm.org/D44984



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D44984: [HIP] Add hip file type and codegen for kernel launching

2018-03-28 Thread Artem Belevich via Phabricator via cfe-commits
tra added a comment.

In https://reviews.llvm.org/D44984#1050557, @yaxunl wrote:

> Yes we already have a basic working implementation of HIP compiler due to 
> Greg's work.


That is great, but it's not necessarily true that all these changes will make 
it into clang/llvm as is. LLVM/Clang is a community effort and it helps a lot 
to get the changes in when the community understands what is it you're planning 
to do. I personally am very glad to see AMD moving towards making clang a 
viable compiler for AMD GPUs, but there's only so much I'll be able to do to 
help you with reviews if all I have is either piecemeal patches with little 
idea how they all fit together or one humongous patch I would have no time to 
dive in and really understand. Considering that compilation for GPU is a fairly 
niche market my bet is that your progress will be bottlenecked by the code 
reviews. Whatever you can do to make reviewers jobs easier by giving more 
context will help a lot with upstreaming the patches.

> I will either update https://reviews.llvm.org/D42800 or create a new review 
> about the toolchain changes for compiling and linking HIP programs. 
> Essentially HIP has its own header files and device libraries which are taken 
> care of by the toolchain patch.

Fair enough. I'll wait for the rest of the patches. If you have multiple 
pending patches, it helps if you could arrange them as dependent patches in 
phabricator. It makes it easier to see the big picture.

> Since the header file and library seem not to affect this patch, is it OK to 
> defer their changes to be part of the toolchain patch?

I'm not sure I understand. Could you elaborate?


https://reviews.llvm.org/D44984



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D44984: [HIP] Add hip file type and codegen for kernel launching

2018-03-28 Thread Yaxun Liu via Phabricator via cfe-commits
yaxunl added a comment.

In https://reviews.llvm.org/D44984#1050526, @tra wrote:

> The changes appear to cover only some of the functionality needed to enable 
> HIP support. Do you have more patches in queue? Having complete picture would 
> help to make sense of the overall plan.
>  I did ask for it in https://reviews.llvm.org/D42800, but I don't think I've 
> got the answers. It would help a lot if you or @gregrodgers could write a doc 
> somewhere outlining overall plan for HIP support in clang, what are the main 
> issues that need to be dealt with, and at least a general idea on how to 
> handle them.
>
> As far as "add -x hip, and tweak runtime glue codegen" goes, the change looks 
> OK, but it's not very useful all by itself. It leaves a lot of other issues 
> unsolved and no clear plan on whether/when/how you are planning to deal with 
> them.
>
> As things stand right now, with this patch clang will still attempt to 
> include CUDA headers, which, among other things will provide 
> threadIdx/blockIdx and other CUDA-specific features.
>  Perhaps it would make sense to disable pre-inclusion of CUDA headers and, 
> probably, disable use of CUDA's libdevice bitcode library if we're compiling 
> with -x hip (i.e. -nocudainc -nocudalib).
>  If you do depend on CUDA headers, then, I suspect, you may need to adjust 
> some wrapper headers we use for CUDA and that change should probably come 
> before this one.


Hi Artem, I am responsible for upstreaming Greg's work and addressing 
reviewers' comments.

Yes we already have a basic working implementation of HIP compiler due to 
Greg's work. I will either update https://reviews.llvm.org/D42800 or create a 
new review about the toolchain changes for compiling and linking HIP programs. 
Essentially HIP has its own header files and device libraries which are taken 
care of by the toolchain patch.

Since the header file and library seem not to affect this patch, is it OK to 
defer their changes to be part of the toolchain patch?

Thanks.


https://reviews.llvm.org/D44984



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D44984: [HIP] Add hip file type and codegen for kernel launching

2018-03-28 Thread Artem Belevich via Phabricator via cfe-commits
tra added a subscriber: gregrodgers.
tra added a comment.

The changes appear to cover only some of the functionality needed to enable HIP 
support. Do you have more patches in queue? Having complete picture would help 
to make sense of the overall plan.
I did ask for it in https://reviews.llvm.org/D42800, but I don't think I've got 
the answers. It would help a lot if you or @gregrodgers could write a doc 
somewhere outlining overall plan for HIP support in clang, what are the main 
issues that need to be dealt with, and at least a general idea on how to handle 
them.

As far as "add -x hip, and tweak runtime glue codegen" goes, the change looks 
OK, but it's not very useful all by itself. It leaves a lot of other issues 
unsolved and no clear plan on whether/when/how you are planning to deal with 
them.

As things stand right now, with this patch clang will still attempt to include 
CUDA headers, which, among other things will provide threadIdx/blockIdx and 
other CUDA-specific features.
Perhaps it would make sense to disable pre-inclusion of CUDA headers and, 
probably, disable use of CUDA's libdevice bitcode library if we're compiling 
with -x hip (i.e. -nocudainc -nocudalib).
If you do depend on CUDA headers, then, I suspect, you may need to adjust some 
wrapper headers we use for CUDA and that change should probably come before 
this one.




Comment at: test/CodeGenCUDA/device-stub.cu:2-9
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s 
-fcuda-include-gpubinary %t -o - | FileCheck -check-prefixes=CHECK,CUDA %s
 // RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s 
-fcuda-include-gpubinary %t -o -  -DNOGLOBALS \
 // RUN:   | FileCheck %s -check-prefix=NOGLOBALS
-// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s 
-check-prefix=NOGPUBIN
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -x hip -emit-llvm %s -o - | 
FileCheck %s -check-prefix=NOGPUBIN
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -x hip -emit-llvm %s 
-fcuda-include-gpubinary %t -o - | FileCheck -check-prefixes=CHECK,HIP %s
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -x hip -emit-llvm %s 
-fcuda-include-gpubinary %t -o -  -DNOGLOBALS \
+// RUN:   | FileCheck %s -check-prefix=NOGLOBALS

Please wrap the long lines.


https://reviews.llvm.org/D44984



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D44984: [HIP] Add hip file type and codegen for kernel launching

2018-03-28 Thread Yaxun Liu via Phabricator via cfe-commits
yaxunl created this revision.
yaxunl added a reviewer: rjmccall.
Herald added a subscriber: tpr.

HIP is a language similar to CUDA 
(https://github.com/ROCm-Developer-Tools/HIP/blob/master/docs/markdown/hip_kernel_language.md
 ).
The language syntax is very similar, which allows a hip program to be compiled 
as a CUDA program by Clang. The main difference
is the host API. HIP has a set of vendor neutral host API which can be 
implemented on different platforms. Currently there is open source
implementation of HIP runtime on amdgpu target 
(https://github.com/ROCm-Developer-Tools/HIP).

This patch adds support of file type hip and language option HIP.

When hip file is compiled, both LangOpts.CUDA and LangOpts.HIP is turned on. 
This allows compilation of hip program as CUDA
in most cases and only special handling of hip program is needed LangOpts.HIP 
is checked.

This patch also adds support of kernel launching of HIP program using HIP host 
API.

When -x hip is not specified, there is no behaviour change for CUDA.

Patch by Greg Rodgers.
Lit test added by Yaxun Liu.


https://reviews.llvm.org/D44984

Files:
  include/clang/Basic/LangOptions.def
  lib/CodeGen/CGCUDANV.cpp
  lib/Frontend/CompilerInvocation.cpp
  lib/Frontend/InitPreprocessor.cpp
  lib/Sema/SemaCUDA.cpp
  lib/Sema/SemaDecl.cpp
  test/CodeGenCUDA/Inputs/cuda.h
  test/CodeGenCUDA/device-stub.cu
  test/CodeGenCUDA/kernel-call.cu

Index: test/CodeGenCUDA/kernel-call.cu
===
--- test/CodeGenCUDA/kernel-call.cu
+++ test/CodeGenCUDA/kernel-call.cu
@@ -1,11 +1,20 @@
-// RUN: %clang_cc1 -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm %s -o - | FileCheck %s --check-prefixes=CUDA,CHECK
+// RUN: %clang_cc1 -x hip -emit-llvm %s -o - | FileCheck %s --check-prefixes=HIP,CHECk
+
 
 #include "Inputs/cuda.h"
 
+// CHECK-LABEL: define void @_Z2g1i(i32 %x)
+// HIP: call{{.*}}hipSetupArgument
+// HIP: call{{.*}}hipLaunchByPtr
+// CUDA: call{{.*}}cudaSetupArgument
+// CUDA: call{{.*}}cudaLaunch
 __global__ void g1(int x) {}
 
+// CHECK-LABEL: define i32 @main
 int main(void) {
-  // CHECK: call{{.*}}cudaConfigureCall
+  // HIP: call{{.*}}hipConfigureCall
+  // CUDA: call{{.*}}cudaConfigureCall
   // CHECK: icmp
   // CHECK: br
   // CHECK: call{{.*}}g1
Index: test/CodeGenCUDA/device-stub.cu
===
--- test/CodeGenCUDA/device-stub.cu
+++ test/CodeGenCUDA/device-stub.cu
@@ -1,8 +1,12 @@
 // RUN: echo "GPU binary would be here" > %t
-// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -fcuda-include-gpubinary %t -o - | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -fcuda-include-gpubinary %t -o - | FileCheck -check-prefixes=CHECK,CUDA %s
 // RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -fcuda-include-gpubinary %t -o -  -DNOGLOBALS \
 // RUN:   | FileCheck %s -check-prefix=NOGLOBALS
-// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s -check-prefix=NOGPUBIN
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -x hip -emit-llvm %s -o - | FileCheck %s -check-prefix=NOGPUBIN
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -x hip -emit-llvm %s -fcuda-include-gpubinary %t -o - | FileCheck -check-prefixes=CHECK,HIP %s
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -x hip -emit-llvm %s -fcuda-include-gpubinary %t -o -  -DNOGLOBALS \
+// RUN:   | FileCheck %s -check-prefix=NOGLOBALS
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -x hip -emit-llvm %s -o - | FileCheck %s -check-prefix=NOGPUBIN
 
 #include "Inputs/cuda.h"
 
@@ -48,67 +52,68 @@
 // CHECK: private unnamed_addr constant{{.*GPU binary would be here.*}}\00"
 // CHECK-SAME: section ".nv_fatbin", align 8
 // * constant struct that wraps GPU binary
-// CHECK: @__cuda_fatbin_wrapper = internal constant { i32, i32, i8*, i8* } 
+// CUDA: @__[[PREFIX:cuda]]_fatbin_wrapper = internal constant { i32, i32, i8*, i8* }
+// HIP: @__[[PREFIX:hip]]_fatbin_wrapper = internal constant { i32, i32, i8*, i8* }
 // CHECK-SAME: { i32 1180844977, i32 1, {{.*}}, i8* null }
 // CHECK-SAME: section ".nvFatBinSegment"
 // * variable to save GPU binary handle after initialization
-// CHECK: @__cuda_gpubin_handle = internal global i8** null
+// CHECK: @__[[PREFIX]]_gpubin_handle = internal global i8** null
 // * Make sure our constructor/destructor was added to global ctor/dtor list.
-// CHECK: @llvm.global_ctors = appending global {{.*}}@__cuda_module_ctor
-// CHECK: @llvm.global_dtors = appending global {{.*}}@__cuda_module_dtor
+// CHECK: @llvm.global_ctors = appending global {{.*}}@__[[PREFIX]]_module_ctor
+// CHECK: @llvm.global_dtors = appending global {{.*}}@__[[PREFIX]]_module_dtor
 
 // Test that we build the correct number of calls to cudaSetupArgument followed
 // by a call to cudaLaunch.
 
 // CHECK: define{{.*}}kernelfunc
-// CHECK: call{{.*}}cudaSetupArgument
-// CHECK: call{{.*}}cudaSetupArgument
-// CHECK: call{{.*}}cudaSetupArgument