[PATCH] D102107: [OpenMP] Codegen aggregate for outlined function captures

2021-12-23 Thread Pushpinder Singh via Phabricator via cfe-commits
pdhaliwal added a comment.

And, I am seeing a lot of failures on nvptx machine (sm_70, cuda11.4) with this 
patch,

  libomptarget :: nvptx64-nvidia-cuda :: offloading/bug49021.cpp
  libomptarget :: nvptx64-nvidia-cuda :: offloading/bug49334.cpp
  libomptarget :: nvptx64-nvidia-cuda :: offloading/bug49779.cpp
  libomptarget :: nvptx64-nvidia-cuda :: offloading/bug51781.c
  libomptarget :: nvptx64-nvidia-cuda :: offloading/bug51982.c
  libomptarget :: nvptx64-nvidia-cuda :: 
unified_shared_memory/close_enter_exit.c
  libomptarget :: nvptx64-nvidia-cuda :: unified_shared_memory/close_modifier.c
  libomptarget :: nvptx64-nvidia-cuda :: unified_shared_memory/shared_update.c
  libomptarget :: nvptx64-nvidia-cuda-newRTL :: offloading/bug49021.cpp
  libomptarget :: nvptx64-nvidia-cuda-newRTL :: offloading/bug49334.cpp
  libomptarget :: nvptx64-nvidia-cuda-newRTL :: offloading/bug51781.c
  libomptarget :: nvptx64-nvidia-cuda-newRTL :: 
unified_shared_memory/close_enter_exit.c
  libomptarget :: nvptx64-nvidia-cuda-newRTL :: 
unified_shared_memory/close_modifier.c
  libomptarget :: nvptx64-nvidia-cuda-newRTL :: 
unified_shared_memory/shared_update.c

On amdgcn, these are the tests failing,

  libomptarget :: amdgcn-amd-amdhsa :: offloading/bug49021.cpp
  libomptarget :: amdgcn-amd-amdhsa :: offloading/bug51781.c
  libomptarget :: amdgcn-amd-amdhsa :: offloading/bug51982.c
  libomptarget :: amdgcn-amd-amdhsa-newRTL :: offloading/bug49021.cpp
  libomptarget :: amdgcn-amd-amdhsa-newRTL :: offloading/bug51781.c


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D102107/new/

https://reviews.llvm.org/D102107

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D114865: [AMDGPU][OpenMP] Use -amdgpu-fixed-function-abi

2021-12-01 Thread Pushpinder Singh via Phabricator via cfe-commits
pdhaliwal created this revision.
pdhaliwal added reviewers: arsenm, JonChesterfield.
Herald added subscribers: kerbowa, guansong, t-tye, tpr, dstuttard, yaxunl, 
nhaehnle, jvesely, kzhuravl.
pdhaliwal requested review of this revision.
Herald added subscribers: openmp-commits, cfe-commits, sstefan1, wdng.
Herald added a reviewer: jdoerfert.
Herald added projects: clang, OpenMP.

This fixes issue of  arguments clobbering when using indirect
calls.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D114865

Files:
  clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
  clang/test/Driver/amdgpu-openmp-system-arch.c
  clang/test/Driver/amdgpu-openmp-toolchain.c
  openmp/libomptarget/test/offloading/bug49021.cpp


Index: openmp/libomptarget/test/offloading/bug49021.cpp
===
--- openmp/libomptarget/test/offloading/bug49021.cpp
+++ openmp/libomptarget/test/offloading/bug49021.cpp
@@ -1,8 +1,4 @@
 // RUN: %libomptarget-compilexx-generic -O3 && %libomptarget-run-generic
-
-// Wrong results on amdgpu
-// XFAIL: amdgcn-amd-amdhsa
-
 #include 
 
 template  int test_map() {
Index: clang/test/Driver/amdgpu-openmp-toolchain.c
===
--- clang/test/Driver/amdgpu-openmp-toolchain.c
+++ clang/test/Driver/amdgpu-openmp-toolchain.c
@@ -8,7 +8,7 @@
 // CHECK: clang{{.*}}"-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-x" 
"ir"{{.*}}
 // CHECK: clang{{.*}}"-cc1"{{.*}}"-triple" 
"amdgcn-amd-amdhsa"{{.*}}"-emit-llvm-bc"{{.*}}"-target-cpu" "gfx906" 
"-fcuda-is-device"{{.*}}"-mlink-builtin-bitcode"{{.*}}libomptarget-amdgcn-gfx906.bc"{{.*}}
 // CHECK: llvm-link{{.*}}"-o" 
"{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906-linked-{{.*}}.bc"
-// CHECK: llc{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906-linked-{{.*}}.bc" 
"-mtriple=amdgcn-amd-amdhsa" "-mcpu=gfx906" "-filetype=obj" 
"-o"{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906-{{.*}}.o"
+// CHECK: llc{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906-linked-{{.*}}.bc" 
"-mtriple=amdgcn-amd-amdhsa" "-mcpu=gfx906" "-filetype=obj" 
"-amdgpu-fixed-function-abi" 
"-o"{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906-{{.*}}.o"
 // CHECK: lld{{.*}}"-flavor" "gnu" "--no-undefined" "-shared" 
"-o"{{.*}}amdgpu-openmp-toolchain-{{.*}}.out" 
"{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906-{{.*}}.o"
 // CHECK: clang-offload-wrapper{{.*}}"-target" "x86_64-unknown-linux-gnu" "-o" 
"{{.*}}a-{{.*}}.bc" {{.*}}amdgpu-openmp-toolchain-{{.*}}.out"
 // CHECK: clang{{.*}}"-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-o" 
"{{.*}}a-{{.*}}.o" "-x" "ir" "{{.*}}a-{{.*}}.bc"
@@ -58,8 +58,8 @@
 // verify the llc is invoked for textual assembly output
 // RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp 
-fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa 
-march=gfx906 --libomptarget-amdgcn-bc-path=%S/Inputs/hip_dev_lib -save-temps 
%s 2>&1 \
 // RUN:   | FileCheck %s --check-prefix=CHECK-SAVE-ASM
-// CHECK-SAVE-ASM: llc{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906-linked.bc" 
"-mtriple=amdgcn-amd-amdhsa" "-mcpu=gfx906" "-filetype=asm" 
"-o"{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906.s"
-// CHECK-SAVE-ASM: llc{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906-linked.bc" 
"-mtriple=amdgcn-amd-amdhsa" "-mcpu=gfx906" "-filetype=obj" 
"-o"{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906.o"
+// CHECK-SAVE-ASM: llc{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906-linked.bc" 
"-mtriple=amdgcn-amd-amdhsa" "-mcpu=gfx906" "-filetype=asm" 
"-amdgpu-fixed-function-abi" "-o"{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906.s"
+// CHECK-SAVE-ASM: llc{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906-linked.bc" 
"-mtriple=amdgcn-amd-amdhsa" "-mcpu=gfx906" "-filetype=obj" 
"-amdgpu-fixed-function-abi" "-o"{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906.o"
 
 // check the handling of -c
 // RUN:   %clang -ccc-print-bindings -c --target=x86_64-unknown-linux-gnu 
-fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa 
-march=gfx906 --libomptarget-amdgcn-bc-path=%S/Inputs/hip_dev_lib -save-temps 
%s 2>&1 \
Index: clang/test/Driver/amdgpu-openmp-system-arch.c
===
--- clang/test/Driver/amdgpu-openmp-system-arch.c
+++ clang/test/Driver/amdgpu-openmp-system-arch.c
@@ -14,11 +14,11 @@
 // RUN:   | FileCheck %s
 // CHECK: clang{{.*}}"-cc1"{{.*}}"-triple" 
"amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "[[GFX:gfx906]]"
 // CHECK: llvm-link{{.*}}"-o" 
"{{.*}}amdgpu-openmp-system-arch-{{.*}}-[[GFX]]-linked-{{.*}}.bc"
-// CHECK: llc{{.*}}amdgpu-openmp-system-arch-{{.*}}-[[GFX]]-linked-{{.*}}.bc" 
"-mtriple=amdgcn-amd-amdhsa" "-mcpu=[[GFX]]" "-filetype=obj" 
"-o"{{.*}}amdgpu-openmp-system-arch-{{.*}}-[[GFX]]-{{.*}}.o"
+// CHECK: llc{{.*}}amdgpu-openmp-system-arch-{{.*}}-[[GFX]]-linked-{{.*}}.bc" 
"-mtriple=amdgcn-amd-amdhsa" "-mcpu=[[GFX]]" "-filetype=obj" 
"-amdgpu-fixed-function-abi" 
"-o"{{.*}}amdgpu-openmp-system-arch-{{.*}}-[[GFX]]-{{.*}}.o"
 
 // case when 

[PATCH] D111218: [AMDGPU][OpenMP] Mark oulined functions always_inline

2021-10-06 Thread Pushpinder Singh via Phabricator via cfe-commits
pdhaliwal updated this revision to Diff 377502.
pdhaliwal added a comment.

Only removing optnone.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D111218/new/

https://reviews.llvm.org/D111218

Files:
  clang/lib/CodeGen/CGStmtOpenMP.cpp


Index: clang/lib/CodeGen/CGStmtOpenMP.cpp
===
--- clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -503,6 +503,11 @@
 F->setDoesNotThrow();
   F->setDoesNotRecurse();
 
+  // TODO: should not need this once amdgcn handles function pointers properly.
+  if (CGM.getTriple().isAMDGCN()) {
+F->removeFnAttr(llvm::Attribute::OptimizeNone);
+  }
+
   // Generate the function.
   CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, Args, Loc, Loc);
   Address ContextAddr = CGF.GetAddrOfLocalVar(CD->getContextParam());
@@ -665,8 +670,14 @@
   F->setDoesNotRecurse();
 
   // Always inline the outlined function if optimizations are enabled.
-  if (CGM.getCodeGenOpts().OptimizationLevel != 0)
+  if (CGM.getCodeGenOpts().OptimizationLevel != 0) {
 F->addFnAttr(llvm::Attribute::AlwaysInline);
+  }
+
+  // TODO: remove this once amdgcn handles function pointers properly.
+  if (CGM.getTriple().isAMDGCN()) {
+F->removeFnAttr(llvm::Attribute::OptimizeNone);
+  }
 
   // Generate the function.
   CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs,


Index: clang/lib/CodeGen/CGStmtOpenMP.cpp
===
--- clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -503,6 +503,11 @@
 F->setDoesNotThrow();
   F->setDoesNotRecurse();
 
+  // TODO: should not need this once amdgcn handles function pointers properly.
+  if (CGM.getTriple().isAMDGCN()) {
+F->removeFnAttr(llvm::Attribute::OptimizeNone);
+  }
+
   // Generate the function.
   CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, Args, Loc, Loc);
   Address ContextAddr = CGF.GetAddrOfLocalVar(CD->getContextParam());
@@ -665,8 +670,14 @@
   F->setDoesNotRecurse();
 
   // Always inline the outlined function if optimizations are enabled.
-  if (CGM.getCodeGenOpts().OptimizationLevel != 0)
+  if (CGM.getCodeGenOpts().OptimizationLevel != 0) {
 F->addFnAttr(llvm::Attribute::AlwaysInline);
+  }
+
+  // TODO: remove this once amdgcn handles function pointers properly.
+  if (CGM.getTriple().isAMDGCN()) {
+F->removeFnAttr(llvm::Attribute::OptimizeNone);
+  }
 
   // Generate the function.
   CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs,
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D111218: [AMDGPU][OpenMP] Mark oulined functions always_inline

2021-10-06 Thread Pushpinder Singh via Phabricator via cfe-commits
pdhaliwal planned changes to this revision.
pdhaliwal added a comment.

I don't have any concrete evidence but I have some doubt on presence of 
function pointers causing backend to behave improperly. Also, here removing 
optnone alone suffices to fix the issue.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D111218/new/

https://reviews.llvm.org/D111218

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D102107: [OpenMP] Codegen aggregate for outlined function captures

2021-10-06 Thread Pushpinder Singh via Phabricator via cfe-commits
pdhaliwal added a comment.

I have created a patch (D111218 ) with fix 
for amdgcn. This is a temporary fix. I will still keep on looking into it until 
I find a real root cause.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D102107/new/

https://reviews.llvm.org/D102107

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D111218: [AMDGPU][OpenMP] Mark oulined functions always_inline

2021-10-06 Thread Pushpinder Singh via Phabricator via cfe-commits
pdhaliwal created this revision.
pdhaliwal added reviewers: JonChesterfield, jdoerfert, jhuber6, ggeorgakoudis.
Herald added subscribers: guansong, t-tye, tpr, dstuttard, yaxunl, kzhuravl.
pdhaliwal requested review of this revision.
Herald added subscribers: cfe-commits, sstefan1, wdng.
Herald added a project: clang.

This depends on D102107  and unblocks the 
failing amdgcn runtime
tests in the latter.

>From what I understand is that amd-stg-open is working because
everything is marked inline in an internal pass which main branch
currently does not have. Marking the outlined functions as
always_inline does fix the issue, however, proper fixes to the
backend are still required. Until then, this will work. I have also
added TODO on top of the added code.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D111218

Files:
  clang/lib/CodeGen/CGStmtOpenMP.cpp


Index: clang/lib/CodeGen/CGStmtOpenMP.cpp
===
--- clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -503,6 +503,13 @@
 F->setDoesNotThrow();
   F->setDoesNotRecurse();
 
+  // TODO: should not need this once amdgcn handles function calls properly.
+  if (CGM.getTriple().isAMDGCN()) {
+F->removeFnAttr(llvm::Attribute::OptimizeNone);
+F->removeFnAttr(llvm::Attribute::NoInline);
+F->addFnAttr(llvm::Attribute::AlwaysInline);
+  }
+
   // Generate the function.
   CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, Args, Loc, Loc);
   Address ContextAddr = CGF.GetAddrOfLocalVar(CD->getContextParam());
@@ -664,9 +671,14 @@
 F->setDoesNotThrow();
   F->setDoesNotRecurse();
 
-  // Always inline the outlined function if optimizations are enabled.
-  if (CGM.getCodeGenOpts().OptimizationLevel != 0)
+  // Always inline the outlined function if optimizations are enabled or 
current
+  // target is amdgcn.
+  // TODO: amdgcn check should be removed once it handles function calls 
properly.
+  if (CGM.getCodeGenOpts().OptimizationLevel != 0 || 
CGM.getTriple().isAMDGCN()) {
+F->removeFnAttr(llvm::Attribute::NoInline);
+F->removeFnAttr(llvm::Attribute::OptimizeNone);
 F->addFnAttr(llvm::Attribute::AlwaysInline);
+  }
 
   // Generate the function.
   CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs,


Index: clang/lib/CodeGen/CGStmtOpenMP.cpp
===
--- clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -503,6 +503,13 @@
 F->setDoesNotThrow();
   F->setDoesNotRecurse();
 
+  // TODO: should not need this once amdgcn handles function calls properly.
+  if (CGM.getTriple().isAMDGCN()) {
+F->removeFnAttr(llvm::Attribute::OptimizeNone);
+F->removeFnAttr(llvm::Attribute::NoInline);
+F->addFnAttr(llvm::Attribute::AlwaysInline);
+  }
+
   // Generate the function.
   CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, Args, Loc, Loc);
   Address ContextAddr = CGF.GetAddrOfLocalVar(CD->getContextParam());
@@ -664,9 +671,14 @@
 F->setDoesNotThrow();
   F->setDoesNotRecurse();
 
-  // Always inline the outlined function if optimizations are enabled.
-  if (CGM.getCodeGenOpts().OptimizationLevel != 0)
+  // Always inline the outlined function if optimizations are enabled or current
+  // target is amdgcn.
+  // TODO: amdgcn check should be removed once it handles function calls properly.
+  if (CGM.getCodeGenOpts().OptimizationLevel != 0 || CGM.getTriple().isAMDGCN()) {
+F->removeFnAttr(llvm::Attribute::NoInline);
+F->removeFnAttr(llvm::Attribute::OptimizeNone);
 F->addFnAttr(llvm::Attribute::AlwaysInline);
+  }
 
   // Generate the function.
   CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs,
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D102107: [OpenMP] Codegen aggregate for outlined function captures

2021-09-30 Thread Pushpinder Singh via Phabricator via cfe-commits
pdhaliwal added a comment.

I modified the declare_mapper_target to print the contents of array after 
target region and found the following output:

  2 3 4 5 6 7 8 9 10 11 Sum = 65

Program:

  #include 
  #include 
  
  #define NUM 10
  
  int main() {
int *c= new int[NUM];
for (int i = 0; i < NUM; i++) {
  c[i] = 1;
}
  #pragma omp target teams distribute  parallel for map(tofrom: c[0:NUM])
for (int i = 0; i < NUM; i++) {
  c[i]++;
}
int sum = 0;
for (int i = 0; i < NUM; i++) {
  sum += c[i];
  printf("%d ", c[i]);
}
// CHECK: Sum = 2048
printf("Sum = %d\n", sum);
return 0;
  }

Different variant of the same program is producing correct output,

  #include 
  #include 
  
  #define NUM 10
  
  int main() {
int *c= new int[NUM];
for (int i = 0; i < NUM; i++) {
  c[i] = 1;
}
  
int *b = new int[NUM];
  #pragma omp target teams distribute  parallel for map(tofrom: c[0:NUM], 
b[0:NUM])
for (int i = 0; i < NUM; i++) {
  b[i] = c[i] + 1;
}
int sum = 0;
for (int i = 0; i < NUM; i++) {
  sum += b[i];
  printf("%d ", b[i]);
}
// CHECK: Sum = 2048
printf("Sum = %d\n", sum);
return 0;
  }

Output (this is the right answer):

  2 2 2 2 2 2 2 2 2 2 Sum = 20

On internal amd-stg-open branch, this patch works fine, so issue is only with 
the trunk.
I compared the generated IR before and after applying this patch, I didn't see 
anything suspicious. (but can't be 100% sure).


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D102107/new/

https://reviews.llvm.org/D102107

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D102107: [OpenMP] Codegen aggregate for outlined function captures

2021-09-29 Thread Pushpinder Singh via Phabricator via cfe-commits
pdhaliwal added a comment.

Apologies for late reply. Most of the tests now do not try to call malloc, so 
no page fault errors. But all of them are producing wrong results. For e.g. 
declare_mapper_target.cpp produces Sum = 132608 with the patch applied. 
Similarly for other tests as well. So don't know what's happening yet.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D102107/new/

https://reviews.llvm.org/D102107

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D105191: [Clang][OpenMP] Add support for Static Device Libraries

2021-09-22 Thread Pushpinder Singh via Phabricator via cfe-commits
pdhaliwal added inline comments.



Comment at: clang/test/Driver/fat_archive.cpp:9
+// could be resolved correctly.
+// RUN: env 
LIBRARY_PATH=%T/../../../../../runtimes/runtimes-bins/openmp/libomptarget 
%clang -O2 -target x86_64-pc-linux-gnu -fopenmp 
-fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa 
-march=gfx906 %s -L%S/Inputs/hip_dev_lib -lFatArchive -o - | FileCheck %s 
-check-prefix=LINKERROR
+// LINKERROR-NOT: error: linker command failed with exit code 1

Here, LIBRARY_PATH is specifying path to build directory of openmp runtime 
which might not be available when openmp is not built. Why not use %S/Inputs 
directory and put the required files into that?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D105191/new/

https://reviews.llvm.org/D105191

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D102107: [OpenMP] Codegen aggregate for outlined function captures

2021-09-22 Thread Pushpinder Singh via Phabricator via cfe-commits
pdhaliwal added a comment.

I got this after changing __kmpc_impl_malloc to return 0xdeadbeef. So, this 
confirms that missing malloc implementation is the root cause.

> Memory access fault by GPU node-4 (Agent handle: 0x1bc5000) on address 
> 0xdeadb000. Reason: Page not present or supervisor privilege.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D102107/new/

https://reviews.llvm.org/D102107

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D102107: [OpenMP] Codegen aggregate for outlined function captures

2021-09-22 Thread Pushpinder Singh via Phabricator via cfe-commits
pdhaliwal added a comment.

It looks like from IR diff that this patch is adding use of kmpc_alloc_shared 
method. These methods likely won't work on AMDGPU as device malloc is not 
available. Not sure what could be done apart from marking those tests as XFAIL 
on amdgcn. :(


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D102107/new/

https://reviews.llvm.org/D102107

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D109344: [AMDGPU][OpenMP] Use complex definitions from complex_cmath.h

2021-09-14 Thread Pushpinder Singh via Phabricator via cfe-commits
pdhaliwal added a comment.

Even with declare variant separated using ifdef's, the error is still there. So 
I don't think we have workaround for this.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D109344/new/

https://reviews.llvm.org/D109344

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D109344: [AMDGPU][OpenMP] Use complex definitions from complex_cmath.h

2021-09-13 Thread Pushpinder Singh via Phabricator via cfe-commits
pdhaliwal added a comment.

Hey, Jon, sorry for late reply. I cannot reproduce this issue on nvptx so it 
seems to occur only on amdgcn. Will it be better if instead the name mangling 
issue is fixed? Or for the meantime, I could add #ifdef around as a temporary 
fix. Suggestions?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D109344/new/

https://reviews.llvm.org/D109344

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D109344: [AMDGPU][OpenMP] Use complex definitions from complex_cmath.h

2021-09-07 Thread Pushpinder Singh via Phabricator via cfe-commits
pdhaliwal updated this revision to Diff 371008.
pdhaliwal added a comment.

Add amdgcn to the same arch list as nvptx


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D109344/new/

https://reviews.llvm.org/D109344

Files:
  clang/lib/Headers/openmp_wrappers/complex
  clang/test/Headers/amdgcn-openmp-device-math-complex.cpp

Index: clang/test/Headers/amdgcn-openmp-device-math-complex.cpp
===
--- /dev/null
+++ clang/test/Headers/amdgcn-openmp-device-math-complex.cpp
@@ -0,0 +1,85 @@
+// RUN: %clang_cc1 -verify -internal-isystem %S/Inputs/include -fopenmp -x c++ -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -internal-isystem %S/Inputs/include -fopenmp -x c++ -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -aux-triple x86_64-unknown-unknown -o - | FileCheck %s
+// expected-no-diagnostics
+
+#include 
+#include 
+
+// CHECK: define weak {{.*}} @__muldc3
+// CHECK-DAG: call i32 @__ocml_isnan_f64(
+// CHECK-DAG: call i32 @__ocml_isinf_f64(
+
+// CHECK: define weak {{.*}} @__mulsc3
+// CHECK-DAG: call i32 @__ocml_isnan_f32(
+// CHECK-DAG: call i32 @__ocml_isinf_f32(
+// CHECK-DAG: call float @__ocml_copysign_f32(
+
+// CHECK: define weak {{.*}} @__divdc3
+// CHECK-DAG: call i32 @__ocml_isnan_f64(
+// CHECK-DAG: call i32 @__ocml_isinf_f64(
+// CHECK-DAG: call i32 @__ocml_isfinite_f64(
+// CHECK-DAG: call double @__ocml_copysign_f64(
+// CHECK-DAG: call double @__ocml_scalbn_f64(
+// CHECK-DAG: call double @__ocml_fabs_f64(
+// CHECK-DAG: call double @__ocml_logb_f64(
+
+// CHECK: define weak {{.*}} @__divsc3
+// CHECK-DAG: call i32 @__ocml_isnan_f32(
+// CHECK-DAG: call i32 @__ocml_isinf_f32(
+// CHECK-DAG: call i32 @__ocml_isfinite_f32(
+// CHECK-DAG: call float @__ocml_copysign_f32(
+// CHECK-DAG: call float @__ocml_scalbn_f32(
+// CHECK-DAG: call float @__ocml_fabs_f32(
+// CHECK-DAG: call float @__ocml_logb_f32(
+
+// We actually check that there are no declarations of non-OpenMP functions.
+// That is, as long as we don't call an unkown function with a name that
+// doesn't start with '__' we are good :)
+
+// CHECK-NOT: declare.*@[^_]
+
+void test_scmplx(std::complex a) {
+#pragma omp target
+  {
+(void)(a * (a / a));
+  }
+}
+
+void test_dcmplx(std::complex a) {
+#pragma omp target
+  {
+(void)(a * (a / a));
+  }
+}
+
+template 
+std::complex test_template_math_calls(std::complex a) {
+  decltype(a) r = a;
+#pragma omp target
+  {
+r = std::sin(r);
+r = std::cos(r);
+r = std::exp(r);
+r = std::atan(r);
+r = std::acos(r);
+  }
+  return r;
+}
+
+std::complex test_scall(std::complex a) {
+  decltype(a) r;
+#pragma omp target
+  {
+r = std::sin(a);
+  }
+  return test_template_math_calls(r);
+}
+
+std::complex test_dcall(std::complex a) {
+  decltype(a) r;
+#pragma omp target
+  {
+r = std::exp(a);
+  }
+  return test_template_math_calls(r);
+}
Index: clang/lib/Headers/openmp_wrappers/complex
===
--- clang/lib/Headers/openmp_wrappers/complex
+++ clang/lib/Headers/openmp_wrappers/complex
@@ -45,7 +45,7 @@
 #ifndef _LIBCPP_STD_VER
 
 #pragma omp begin declare variant match(   \
-device = {arch(nvptx, nvptx64)},   \
+device = {arch(amdgcn, nvptx, nvptx64)},   \
 implementation = {extension(match_any, allow_templates)})
 
 #include 
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D109344: [AMDGPU][OpenMP] Use complex definitions from complex_cmath.h

2021-09-07 Thread Pushpinder Singh via Phabricator via cfe-commits
pdhaliwal created this revision.
pdhaliwal added reviewers: ye-luo, jdoerfert, ronlieb, JonChesterfield.
Herald added subscribers: guansong, t-tye, tpr, dstuttard, yaxunl, jvesely, 
kzhuravl.
pdhaliwal requested review of this revision.
Herald added subscribers: cfe-commits, sstefan1, wdng.
Herald added a project: clang.

Following nvptx approach, this patch uses complex function
definitions from complex_cmath.h. With this patch, ovo passes
23/34 complex mathematical test cases.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D109344

Files:
  clang/lib/Headers/openmp_wrappers/complex
  clang/test/Headers/amdgcn-openmp-device-math-complex.cpp

Index: clang/test/Headers/amdgcn-openmp-device-math-complex.cpp
===
--- /dev/null
+++ clang/test/Headers/amdgcn-openmp-device-math-complex.cpp
@@ -0,0 +1,85 @@
+// RUN: %clang_cc1 -verify -internal-isystem %S/Inputs/include -fopenmp -x c++ -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -internal-isystem %S/Inputs/include -fopenmp -x c++ -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -aux-triple x86_64-unknown-unknown -o - | FileCheck %s
+// expected-no-diagnostics
+
+#include 
+#include 
+
+// CHECK: define weak {{.*}} @__muldc3
+// CHECK-DAG: call i32 @__ocml_isnan_f64(
+// CHECK-DAG: call i32 @__ocml_isinf_f64(
+
+// CHECK: define weak {{.*}} @__mulsc3
+// CHECK-DAG: call i32 @__ocml_isnan_f32(
+// CHECK-DAG: call i32 @__ocml_isinf_f32(
+// CHECK-DAG: call float @__ocml_copysign_f32(
+
+// CHECK: define weak {{.*}} @__divdc3
+// CHECK-DAG: call i32 @__ocml_isnan_f64(
+// CHECK-DAG: call i32 @__ocml_isinf_f64(
+// CHECK-DAG: call i32 @__ocml_isfinite_f64(
+// CHECK-DAG: call double @__ocml_copysign_f64(
+// CHECK-DAG: call double @__ocml_scalbn_f64(
+// CHECK-DAG: call double @__ocml_fabs_f64(
+// CHECK-DAG: call double @__ocml_logb_f64(
+
+// CHECK: define weak {{.*}} @__divsc3
+// CHECK-DAG: call i32 @__ocml_isnan_f32(
+// CHECK-DAG: call i32 @__ocml_isinf_f32(
+// CHECK-DAG: call i32 @__ocml_isfinite_f32(
+// CHECK-DAG: call float @__ocml_copysign_f32(
+// CHECK-DAG: call float @__ocml_scalbn_f32(
+// CHECK-DAG: call float @__ocml_fabs_f32(
+// CHECK-DAG: call float @__ocml_logb_f32(
+
+// We actually check that there are no declarations of non-OpenMP functions.
+// That is, as long as we don't call an unkown function with a name that
+// doesn't start with '__' we are good :)
+
+// CHECK-NOT: declare.*@[^_]
+
+void test_scmplx(std::complex a) {
+#pragma omp target
+  {
+(void)(a * (a / a));
+  }
+}
+
+void test_dcmplx(std::complex a) {
+#pragma omp target
+  {
+(void)(a * (a / a));
+  }
+}
+
+template 
+std::complex test_template_math_calls(std::complex a) {
+  decltype(a) r = a;
+#pragma omp target
+  {
+r = std::sin(r);
+r = std::cos(r);
+r = std::exp(r);
+r = std::atan(r);
+r = std::acos(r);
+  }
+  return r;
+}
+
+std::complex test_scall(std::complex a) {
+  decltype(a) r;
+#pragma omp target
+  {
+r = std::sin(a);
+  }
+  return test_template_math_calls(r);
+}
+
+std::complex test_dcall(std::complex a) {
+  decltype(a) r;
+#pragma omp target
+  {
+r = std::exp(a);
+  }
+  return test_template_math_calls(r);
+}
Index: clang/lib/Headers/openmp_wrappers/complex
===
--- clang/lib/Headers/openmp_wrappers/complex
+++ clang/lib/Headers/openmp_wrappers/complex
@@ -44,9 +44,17 @@
 // handle.
 #ifndef _LIBCPP_STD_VER
 
+#ifdef __NVPTX__
 #pragma omp begin declare variant match(   \
 device = {arch(nvptx, nvptx64)},   \
 implementation = {extension(match_any, allow_templates)})
+#endif
+
+#ifdef __AMDGCN__
+#pragma omp begin declare variant match(   \
+device = {arch(amdgcn)},   \
+implementation = {extension(match_any, allow_templates)})
+#endif
 
 #include 
 
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D108774: [OpenMP][FIX] Allow declare variant to work with reference types

2021-08-27 Thread Pushpinder Singh via Phabricator via cfe-commits
pdhaliwal accepted this revision.
pdhaliwal added a comment.

Confirmed locally that this fixes the linking issue. It has fixed the linking 
issue on amdgcn as well. Thanks for working on this.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D108774/new/

https://reviews.llvm.org/D108774

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D108552: [OpenMP][AMDGCN] Enable complex functions

2021-08-23 Thread Pushpinder Singh via Phabricator via cfe-commits
pdhaliwal created this revision.
pdhaliwal added reviewers: JonChesterfield, ronlieb, ye-luo, jdoerfert, yaxunl, 
scchan, b-sumner.
Herald added subscribers: guansong, jvesely.
pdhaliwal requested review of this revision.
Herald added subscribers: cfe-commits, sstefan1.
Herald added a project: clang.

This patch enables basic complex functionality using the ocml builtins.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D108552

Files:
  clang/lib/Headers/__clang_cuda_complex_builtins.h
  clang/lib/Headers/openmp_wrappers/complex
  clang/lib/Headers/openmp_wrappers/complex.h
  clang/test/Headers/amdgcn-openmp-device-math-complex.c

Index: clang/test/Headers/amdgcn-openmp-device-math-complex.c
===
--- /dev/null
+++ clang/test/Headers/amdgcn-openmp-device-math-complex.c
@@ -0,0 +1,50 @@
+// RUN: %clang_cc1 -internal-isystem %S/Inputs/include -x c -fopenmp -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-host.bc
+// RUN: %clang_cc1 -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -x c -fopenmp -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-host.bc -o - | FileCheck %s --check-prefixes=CHECK
+
+#include 
+
+void test_complex_f64(double _Complex a) {
+// CHECK-LABEL: define {{.*}}test_complex_f64
+#pragma omp target
+  {
+// CHECK: call { double, double } @__divdc3
+// CHECK: call { double, double } @__muldc3
+(void)(a * (a / a));
+  }
+}
+
+// CHECK: define weak {{.*}} @__divdc3
+// CHECK-DAG: call double @__ocml_fabs_f64(
+// CHECK-DAG: call i32 @__ocml_isnan_f64(
+// CHECK-DAG: call i32 @__ocml_isfinite_f64(
+// CHECK-DAG: call double @__ocml_copysign_f64(
+// CHECK-DAG: call double @__ocml_scalbn_f64(
+// CHECK-DAG: call double @__ocml_logb_f64(
+
+// CHECK: define weak {{.*}} @__muldc3
+// CHECK-DAG: call i32 @__ocml_isnan_f64(
+// CHECK-DAG: call i32 @__ocml_isinf_f64(
+// CHECK-DAG: call double @__ocml_copysign_f64(
+
+void test_complex_f32(float _Complex a) {
+// CHECK-LABEL: define {{.*}}test_complex_f32
+#pragma omp target
+  {
+// CHECK: call [2 x i32] @__divsc3
+// CHECK: call [2 x i32] @__mulsc3
+(void)(a * (a / a));
+  }
+}
+
+// CHECK: define weak {{.*}} @__divsc3
+// CHECK-DAG: call float @__ocml_fabs_f32(
+// CHECK-DAG: call i32 @__ocml_isnan_f32(
+// CHECK-DAG: call i32 @__ocml_isfinite_f32(
+// CHECK-DAG: call float @__ocml_copysign_f32(
+// CHECK-DAG: call float @__ocml_scalbn_f32(
+// CHECK-DAG: call float @__ocml_logb_f32(
+
+// CHECK: define weak {{.*}} @__mulsc3
+// CHECK-DAG: call i32 @__ocml_isnan_f32(
+// CHECK-DAG: call i32 @__ocml_isinf_f32(
+// CHECK-DAG: call float @__ocml_copysign_f32(
Index: clang/lib/Headers/openmp_wrappers/complex.h
===
--- clang/lib/Headers/openmp_wrappers/complex.h
+++ clang/lib/Headers/openmp_wrappers/complex.h
@@ -17,10 +17,19 @@
 // We require math functions in the complex builtins below.
 #include 
 
+#ifdef __NVPTX__
 #define __OPENMP_NVPTX__
 #include <__clang_cuda_complex_builtins.h>
 #undef __OPENMP_NVPTX__
 #endif
 
+#ifdef __AMDGCN__
+#define __OPENMP_AMDGCN__
+#include <__clang_cuda_complex_builtins.h>
+#undef __OPENMP_AMDGCN__
+#endif
+
+#endif
+
 // Grab the host header too.
 #include_next 
Index: clang/lib/Headers/openmp_wrappers/complex
===
--- clang/lib/Headers/openmp_wrappers/complex
+++ clang/lib/Headers/openmp_wrappers/complex
@@ -17,9 +17,18 @@
 // We require std::math functions in the complex builtins below.
 #include 
 
+#ifdef __NVPTX__
 #define __OPENMP_NVPTX__
 #include <__clang_cuda_complex_builtins.h>
 #undef __OPENMP_NVPTX__
+#endif // __NVPTX__
+
+#ifdef __AMDGCN__
+#define __OPENMP_AMDGCN__
+#include <__clang_cuda_complex_builtins.h>
+#undef __OPENMP_AMDGCN__
+#endif // __AMDGCN__
+
 #endif
 
 // Grab the host header too.
@@ -43,4 +52,4 @@
 
 #pragma omp end declare variant
 
-#endif
+#endif // _LIBCPP_STD_VER
Index: clang/lib/Headers/__clang_cuda_complex_builtins.h
===
--- clang/lib/Headers/__clang_cuda_complex_builtins.h
+++ clang/lib/Headers/__clang_cuda_complex_builtins.h
@@ -16,7 +16,7 @@
 // to work with CUDA and OpenMP target offloading [in C and C++ mode].)
 
 #pragma push_macro("__DEVICE__")
-#ifdef __OPENMP_NVPTX__
+#if defined(__OPENMP_NVPTX__) || defined(__OPENMP_AMDGCN__)
 #pragma omp declare target
 #define __DEVICE__ __attribute__((noinline, nothrow, cold, weak))
 #else
@@ -26,7 +26,7 @@
 // To make the algorithms available for C and C++ in CUDA and OpenMP we select
 // different but equivalent function versions. TODO: For OpenMP we 

[PATCH] D107952: [AMDGPU][OpenMP] Use llvm-link to link ocml libraries

2021-08-12 Thread Pushpinder Singh via Phabricator via cfe-commits
pdhaliwal added inline comments.



Comment at: clang/test/Driver/amdgpu-openmp-toolchain.c:80
+// CHECK-LIB-DEVICE: 
{{.*}}llvm-link{{.*}}ocml.bc"{{.*}}ockl.bc"{{.*}}oclc_daz_opt_on.bc"{{.*}}oclc_unsafe_math_off.bc"{{.*}}oclc_finite_only_off.bc"{{.*}}oclc_correctly_rounded_sqrt_on.bc"{{.*}}oclc_wavefrontsize64_on.bc"{{.*}}oclc_isa_version_803.bc"
+// "llvm-link" "/tmp/amdgpu-openmp-toolchain-e9dcd9.bc" 
"/home/prince/source/repos/llvm-project/clang/test/Driver/Inputs/rocm/amdgcn/bitcode/ocml.bc"
 
"/home/prince/source/repos/llvm-project/clang/test/Driver/Inputs/rocm/amdgcn/bitcode/ockl.bc"
 
"/home/prince/source/repos/llvm-project/clang/test/Driver/Inputs/rocm/amdgcn/bitcode/oclc_daz_opt_on.bc"
 
"/home/prince/source/repos/llvm-project/clang/test/Driver/Inputs/rocm/amdgcn/bitcode/oclc_unsafe_math_off.bc"
 
"/home/prince/source/repos/llvm-project/clang/test/Driver/Inputs/rocm/amdgcn/bitcode/oclc_finite_only_off.bc"
 
"/home/prince/source/repos/llvm-project/clang/test/Driver/Inputs/rocm/amdgcn/bitcode/oclc_correctly_rounded_sqrt_on.bc"
 
"/home/prince/source/repos/llvm-project/clang/test/Driver/Inputs/rocm/amdgcn/bitcode/oclc_wavefrontsize64_on.bc"
 
"/home/prince/source/repos/llvm-project/clang/test/Driver/Inputs/rocm/amdgcn/bitcode/oclc_isa_version_803.bc"
 "-o" "/tmp/amdgpu-openmp-toolchain-e9dcd9-gfx803-linked-4095ab.bc"

JonChesterfield wrote:
> This will fail on machines with a different directory layout, perhaps copy 
> the `{{.*}}` pattern from above
This was accidently put up as a comment to check the actual 'CHECK' line.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D107952/new/

https://reviews.llvm.org/D107952

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D107952: [AMDGPU][OpenMP] Use llvm-link to link ocml libraries

2021-08-12 Thread Pushpinder Singh via Phabricator via cfe-commits
pdhaliwal updated this revision to Diff 365965.
pdhaliwal added a comment.

Remove redundant test


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D107952/new/

https://reviews.llvm.org/D107952

Files:
  clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
  clang/lib/Driver/ToolChains/AMDGPUOpenMP.h
  clang/test/Driver/amdgpu-openmp-toolchain.c

Index: clang/test/Driver/amdgpu-openmp-toolchain.c
===
--- clang/test/Driver/amdgpu-openmp-toolchain.c
+++ clang/test/Driver/amdgpu-openmp-toolchain.c
@@ -76,4 +76,4 @@
 // CHECK-EMIT-LLVM-IR: clang{{.*}}"-cc1"{{.*}}"-triple" "amdgcn-amd-amdhsa"{{.*}}"-emit-llvm"
 
 // RUN: env LIBRARY_PATH=%S/Inputs/hip_dev_lib %clang -### -target x86_64-pc-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx803 -lm --rocm-device-lib-path=%S/Inputs/rocm/amdgcn/bitcode %s 2>&1 | FileCheck %s --check-prefix=CHECK-LIB-DEVICE
-// CHECK-LIB-DEVICE: clang{{.*}}"-cc1"{{.*}}"-triple" "amdgcn-amd-amdhsa"{{.*}}"-mlink-builtin-bitcode"{{.*}}libomptarget-amdgcn-gfx803.bc"{{.*}}"-mlink-builtin-bitcode"{{.*}}ocml.bc" "-mlink-builtin-bitcode"{{.*}}ockl.bc" "-mlink-builtin-bitcode"{{.*}}oclc_daz_opt_on.bc" "-mlink-builtin-bitcode"{{.*}}oclc_unsafe_math_off.bc" "-mlink-builtin-bitcode"{{.*}}oclc_finite_only_off.bc" "-mlink-builtin-bitcode"{{.*}}oclc_correctly_rounded_sqrt_on.bc" "-mlink-builtin-bitcode"{{.*}}oclc_wavefrontsize64_on.bc" "-mlink-builtin-bitcode"{{.*}}oclc_isa_version_803.bc"
+// CHECK-LIB-DEVICE: {{.*}}llvm-link{{.*}}ocml.bc"{{.*}}ockl.bc"{{.*}}oclc_daz_opt_on.bc"{{.*}}oclc_unsafe_math_off.bc"{{.*}}oclc_finite_only_off.bc"{{.*}}oclc_correctly_rounded_sqrt_on.bc"{{.*}}oclc_wavefrontsize64_on.bc"{{.*}}oclc_isa_version_803.bc"
Index: clang/lib/Driver/ToolChains/AMDGPUOpenMP.h
===
--- clang/lib/Driver/ToolChains/AMDGPUOpenMP.h
+++ clang/lib/Driver/ToolChains/AMDGPUOpenMP.h
@@ -16,6 +16,10 @@
 namespace clang {
 namespace driver {
 
+namespace toolchains {
+class AMDGPUOpenMPToolChain;
+}
+
 namespace tools {
 
 namespace AMDGCN {
@@ -35,11 +39,11 @@
 
 private:
   /// \return llvm-link output file name.
-  const char *constructLLVMLinkCommand(Compilation , const JobAction ,
-   const InputInfoList ,
-   const llvm::opt::ArgList ,
-   llvm::StringRef SubArchName,
-   llvm::StringRef OutputFilePrefix) const;
+  const char *constructLLVMLinkCommand(
+  const toolchains::AMDGPUOpenMPToolChain , Compilation ,
+  const JobAction , const InputInfoList ,
+  const llvm::opt::ArgList , llvm::StringRef SubArchName,
+  llvm::StringRef OutputFilePrefix) const;
 
   /// \return llc output file name.
   const char *constructLlcCommand(Compilation , const JobAction ,
Index: clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
===
--- clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
+++ clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
@@ -86,14 +86,34 @@
 } // namespace
 
 const char *AMDGCN::OpenMPLinker::constructLLVMLinkCommand(
-Compilation , const JobAction , const InputInfoList ,
-const ArgList , StringRef SubArchName,
-StringRef OutputFilePrefix) const {
+const toolchains::AMDGPUOpenMPToolChain , Compilation ,
+const JobAction , const InputInfoList , const ArgList ,
+StringRef SubArchName, StringRef OutputFilePrefix) const {
   ArgStringList CmdArgs;
 
   for (const auto  : Inputs)
 if (II.isFilename())
   CmdArgs.push_back(II.getFilename());
+
+  if (Args.hasArg(options::OPT_l)) {
+auto Lm = Args.getAllArgValues(options::OPT_l);
+bool HasLibm = false;
+for (auto  : Lm) {
+  if (Lib == "m") {
+HasLibm = true;
+break;
+  }
+}
+
+if (HasLibm) {
+  SmallVector BCLibs =
+  AMDGPUOpenMPTC.getCommonDeviceLibNames(Args, SubArchName.str());
+  llvm::for_each(BCLibs, [&](StringRef BCFile) {
+CmdArgs.push_back(Args.MakeArgString(BCFile));
+  });
+}
+  }
+
   // Add an intermediate output file.
   CmdArgs.push_back("-o");
   const char *OutputFileName =
@@ -182,8 +202,8 @@
   assert(Prefix.length() && "no linker inputs are files ");
 
   // Each command outputs different files.
-  const char *LLVMLinkCommand =
-  constructLLVMLinkCommand(C, JA, Inputs, Args, GPUArch, Prefix);
+  const char *LLVMLinkCommand = constructLLVMLinkCommand(
+  AMDGPUOpenMPTC, C, JA, Inputs, Args, GPUArch, Prefix);
 
   // Produce readable assembly if save-temps is enabled.
   if (C.getDriver().isSaveTempsEnabled())
@@ -234,27 +254,6 @@
 
   addOpenMPDeviceRTL(getDriver(), DriverArgs, CC1Args, BitcodeSuffix,
  getTriple());
-
-  if (!DriverArgs.hasArg(options::OPT_l))
-return;
-
-  

[PATCH] D107952: [AMDGPU][OpenMP] Use llvm-link to link ocml libraries

2021-08-12 Thread Pushpinder Singh via Phabricator via cfe-commits
pdhaliwal created this revision.
pdhaliwal added reviewers: ronlieb, JonChesterfield.
Herald added subscribers: kerbowa, guansong, t-tye, tpr, dstuttard, yaxunl, 
nhaehnle, jvesely, kzhuravl.
pdhaliwal requested review of this revision.
Herald added subscribers: cfe-commits, sstefan1, wdng.
Herald added a reviewer: jdoerfert.
Herald added a project: clang.

This fixes the 'unused linker option: -lm' warning when compiling
program with -c.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D107952

Files:
  clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
  clang/lib/Driver/ToolChains/AMDGPUOpenMP.h
  clang/test/Driver/amdgpu-openmp-toolchain.c

Index: clang/test/Driver/amdgpu-openmp-toolchain.c
===
--- clang/test/Driver/amdgpu-openmp-toolchain.c
+++ clang/test/Driver/amdgpu-openmp-toolchain.c
@@ -76,4 +76,5 @@
 // CHECK-EMIT-LLVM-IR: clang{{.*}}"-cc1"{{.*}}"-triple" "amdgcn-amd-amdhsa"{{.*}}"-emit-llvm"
 
 // RUN: env LIBRARY_PATH=%S/Inputs/hip_dev_lib %clang -### -target x86_64-pc-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx803 -lm --rocm-device-lib-path=%S/Inputs/rocm/amdgcn/bitcode %s 2>&1 | FileCheck %s --check-prefix=CHECK-LIB-DEVICE
-// CHECK-LIB-DEVICE: clang{{.*}}"-cc1"{{.*}}"-triple" "amdgcn-amd-amdhsa"{{.*}}"-mlink-builtin-bitcode"{{.*}}libomptarget-amdgcn-gfx803.bc"{{.*}}"-mlink-builtin-bitcode"{{.*}}ocml.bc" "-mlink-builtin-bitcode"{{.*}}ockl.bc" "-mlink-builtin-bitcode"{{.*}}oclc_daz_opt_on.bc" "-mlink-builtin-bitcode"{{.*}}oclc_unsafe_math_off.bc" "-mlink-builtin-bitcode"{{.*}}oclc_finite_only_off.bc" "-mlink-builtin-bitcode"{{.*}}oclc_correctly_rounded_sqrt_on.bc" "-mlink-builtin-bitcode"{{.*}}oclc_wavefrontsize64_on.bc" "-mlink-builtin-bitcode"{{.*}}oclc_isa_version_803.bc"
+// CHECK-LIB-DEVICE: {{.*}}llvm-link{{.*}}ocml.bc"{{.*}}ockl.bc"{{.*}}oclc_daz_opt_on.bc"{{.*}}oclc_unsafe_math_off.bc"{{.*}}oclc_finite_only_off.bc"{{.*}}oclc_correctly_rounded_sqrt_on.bc"{{.*}}oclc_wavefrontsize64_on.bc"{{.*}}oclc_isa_version_803.bc"
+// "llvm-link" "/tmp/amdgpu-openmp-toolchain-e9dcd9.bc" "/home/prince/source/repos/llvm-project/clang/test/Driver/Inputs/rocm/amdgcn/bitcode/ocml.bc" "/home/prince/source/repos/llvm-project/clang/test/Driver/Inputs/rocm/amdgcn/bitcode/ockl.bc" "/home/prince/source/repos/llvm-project/clang/test/Driver/Inputs/rocm/amdgcn/bitcode/oclc_daz_opt_on.bc" "/home/prince/source/repos/llvm-project/clang/test/Driver/Inputs/rocm/amdgcn/bitcode/oclc_unsafe_math_off.bc" "/home/prince/source/repos/llvm-project/clang/test/Driver/Inputs/rocm/amdgcn/bitcode/oclc_finite_only_off.bc" "/home/prince/source/repos/llvm-project/clang/test/Driver/Inputs/rocm/amdgcn/bitcode/oclc_correctly_rounded_sqrt_on.bc" "/home/prince/source/repos/llvm-project/clang/test/Driver/Inputs/rocm/amdgcn/bitcode/oclc_wavefrontsize64_on.bc" "/home/prince/source/repos/llvm-project/clang/test/Driver/Inputs/rocm/amdgcn/bitcode/oclc_isa_version_803.bc" "-o" "/tmp/amdgpu-openmp-toolchain-e9dcd9-gfx803-linked-4095ab.bc"
Index: clang/lib/Driver/ToolChains/AMDGPUOpenMP.h
===
--- clang/lib/Driver/ToolChains/AMDGPUOpenMP.h
+++ clang/lib/Driver/ToolChains/AMDGPUOpenMP.h
@@ -16,6 +16,10 @@
 namespace clang {
 namespace driver {
 
+namespace toolchains {
+class AMDGPUOpenMPToolChain;
+}
+
 namespace tools {
 
 namespace AMDGCN {
@@ -35,11 +39,11 @@
 
 private:
   /// \return llvm-link output file name.
-  const char *constructLLVMLinkCommand(Compilation , const JobAction ,
-   const InputInfoList ,
-   const llvm::opt::ArgList ,
-   llvm::StringRef SubArchName,
-   llvm::StringRef OutputFilePrefix) const;
+  const char *constructLLVMLinkCommand(
+  const toolchains::AMDGPUOpenMPToolChain , Compilation ,
+  const JobAction , const InputInfoList ,
+  const llvm::opt::ArgList , llvm::StringRef SubArchName,
+  llvm::StringRef OutputFilePrefix) const;
 
   /// \return llc output file name.
   const char *constructLlcCommand(Compilation , const JobAction ,
Index: clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
===
--- clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
+++ clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
@@ -86,14 +86,34 @@
 } // namespace
 
 const char *AMDGCN::OpenMPLinker::constructLLVMLinkCommand(
-Compilation , const JobAction , const InputInfoList ,
-const ArgList , StringRef SubArchName,
-StringRef OutputFilePrefix) const {
+const toolchains::AMDGPUOpenMPToolChain , Compilation ,
+const JobAction , const InputInfoList , const ArgList ,
+StringRef SubArchName, StringRef OutputFilePrefix) const {
   ArgStringList CmdArgs;
 
   for (const auto  : Inputs)
 if (II.isFilename())
   

[PATCH] D107468: [AMDGPU][OpenMP] Wrap amdgcn declare variant inside ifdef

2021-08-04 Thread Pushpinder Singh via Phabricator via cfe-commits
pdhaliwal created this revision.
pdhaliwal added reviewers: JonChesterfield, ye-luo, ronlieb.
Herald added subscribers: guansong, t-tye, tpr, dstuttard, yaxunl, kzhuravl.
pdhaliwal requested review of this revision.
Herald added subscribers: cfe-commits, sstefan1, wdng.
Herald added a reviewer: jdoerfert.
Herald added a project: clang.

This fixes the issue https://bugs.llvm.org/show_bug.cgi?id=51337


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D107468

Files:
  clang/lib/Headers/openmp_wrappers/__clang_openmp_device_functions.h


Index: clang/lib/Headers/openmp_wrappers/__clang_openmp_device_functions.h
===
--- clang/lib/Headers/openmp_wrappers/__clang_openmp_device_functions.h
+++ clang/lib/Headers/openmp_wrappers/__clang_openmp_device_functions.h
@@ -35,6 +35,7 @@
 
 #pragma omp end declare variant
 
+#ifdef __AMDGCN__
 #pragma omp begin declare variant match(device = {arch(amdgcn)})
 
 // Import types which will be used by __clang_hip_libdevice_declares.h
@@ -54,6 +55,7 @@
 #undef __OPENMP_AMDGCN__
 
 #pragma omp end declare variant
+#endif
 
 #ifdef __cplusplus
 } // extern "C"


Index: clang/lib/Headers/openmp_wrappers/__clang_openmp_device_functions.h
===
--- clang/lib/Headers/openmp_wrappers/__clang_openmp_device_functions.h
+++ clang/lib/Headers/openmp_wrappers/__clang_openmp_device_functions.h
@@ -35,6 +35,7 @@
 
 #pragma omp end declare variant
 
+#ifdef __AMDGCN__
 #pragma omp begin declare variant match(device = {arch(amdgcn)})
 
 // Import types which will be used by __clang_hip_libdevice_declares.h
@@ -54,6 +55,7 @@
 #undef __OPENMP_AMDGCN__
 
 #pragma omp end declare variant
+#endif
 
 #ifdef __cplusplus
 } // extern "C"
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D107468: [AMDGPU][OpenMP] Wrap amdgcn declare variant inside ifdef

2021-08-04 Thread Pushpinder Singh via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rGf3eb5f900d2a: [AMDGPU][OpenMP] Wrap amdgcn declare variant 
inside ifdef (authored by pdhaliwal).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D107468/new/

https://reviews.llvm.org/D107468

Files:
  clang/lib/Headers/openmp_wrappers/__clang_openmp_device_functions.h


Index: clang/lib/Headers/openmp_wrappers/__clang_openmp_device_functions.h
===
--- clang/lib/Headers/openmp_wrappers/__clang_openmp_device_functions.h
+++ clang/lib/Headers/openmp_wrappers/__clang_openmp_device_functions.h
@@ -35,6 +35,7 @@
 
 #pragma omp end declare variant
 
+#ifdef __AMDGCN__
 #pragma omp begin declare variant match(device = {arch(amdgcn)})
 
 // Import types which will be used by __clang_hip_libdevice_declares.h
@@ -54,6 +55,7 @@
 #undef __OPENMP_AMDGCN__
 
 #pragma omp end declare variant
+#endif
 
 #ifdef __cplusplus
 } // extern "C"


Index: clang/lib/Headers/openmp_wrappers/__clang_openmp_device_functions.h
===
--- clang/lib/Headers/openmp_wrappers/__clang_openmp_device_functions.h
+++ clang/lib/Headers/openmp_wrappers/__clang_openmp_device_functions.h
@@ -35,6 +35,7 @@
 
 #pragma omp end declare variant
 
+#ifdef __AMDGCN__
 #pragma omp begin declare variant match(device = {arch(amdgcn)})
 
 // Import types which will be used by __clang_hip_libdevice_declares.h
@@ -54,6 +55,7 @@
 #undef __OPENMP_AMDGCN__
 
 #pragma omp end declare variant
+#endif
 
 #ifdef __cplusplus
 } // extern "C"
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D104904: [OpenMP][AMDGCN] Initial math headers support

2021-08-04 Thread Pushpinder Singh via Phabricator via cfe-commits
pdhaliwal added inline comments.



Comment at: 
clang/lib/Headers/openmp_wrappers/__clang_openmp_device_functions.h:38
+
+#pragma omp begin declare variant match(device = {arch(amdgcn)})
+

JonChesterfield wrote:
> Given that declare variant didn't work elsewhere, it probably doesn't work 
> here. Thus this may be the root cause of 
> https://bugs.llvm.org/show_bug.cgi?id=51337
Was able to reproduce this issue locally on nvptx machine. And you are right, 
declare variant didn't work here as well. Wrapping it in #ifdef fixed the 
issue. I will create a fix.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D104904/new/

https://reviews.llvm.org/D104904

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D104904: [OpenMP][AMDGCN] Initial math headers support

2021-08-02 Thread Pushpinder Singh via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG713a5d12cde5: [OpenMP][AMDGCN] Initial math headers support 
(authored by pdhaliwal).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D104904/new/

https://reviews.llvm.org/D104904

Files:
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/lib/Headers/__clang_hip_cmath.h
  clang/lib/Headers/__clang_hip_math.h
  clang/lib/Headers/openmp_wrappers/__clang_openmp_device_functions.h
  clang/lib/Headers/openmp_wrappers/cmath
  clang/lib/Headers/openmp_wrappers/math.h
  clang/test/Headers/Inputs/include/algorithm
  clang/test/Headers/Inputs/include/cstdlib
  clang/test/Headers/Inputs/include/utility
  clang/test/Headers/amdgcn_openmp_device_math.c
  clang/test/Headers/openmp_device_math_isnan.cpp

Index: clang/test/Headers/openmp_device_math_isnan.cpp
===
--- clang/test/Headers/openmp_device_math_isnan.cpp
+++ clang/test/Headers/openmp_device_math_isnan.cpp
@@ -21,14 +21,14 @@
 double math(float f, double d) {
   double r = 0;
   // INT_RETURN: call i32 @__nv_isnanf(float
-  // AMD_INT_RETURN: call i32 @_{{.*}}isnanf(float
+  // AMD_INT_RETURN: call i32 @__ocml_isnan_f32(float
   // BOOL_RETURN: call i32 @__nv_isnanf(float
-  // AMD_BOOL_RETURN: call zeroext i1 @_{{.*}}isnanf(float
+  // AMD_BOOL_RETURN: call i32 @__ocml_isnan_f32(float
   r += std::isnan(f);
   // INT_RETURN: call i32 @__nv_isnand(double
-  // AMD_INT_RETURN: call i32 @_{{.*}}isnand(double
+  // AMD_INT_RETURN: call i32 @__ocml_isnan_f64(double
   // BOOL_RETURN: call i32 @__nv_isnand(double
-  // AMD_BOOL_RETURN: call zeroext i1 @_{{.*}}isnand(double
+  // AMD_BOOL_RETURN: call i32 @__ocml_isnan_f64(double
   r += std::isnan(d);
   return r;
 }
Index: clang/test/Headers/amdgcn_openmp_device_math.c
===
--- /dev/null
+++ clang/test/Headers/amdgcn_openmp_device_math.c
@@ -0,0 +1,51 @@
+// RUN: %clang_cc1 -internal-isystem %S/Inputs/include -x c -fopenmp -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-host.bc
+// RUN: %clang_cc1 -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -x c -fopenmp -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-host.bc -o - | FileCheck %s --check-prefixes=CHECK-C,CHECK
+// RUN: %clang_cc1 -internal-isystem %S/Inputs/include -x c++ -fopenmp -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-host.bc
+// RUN: %clang_cc1 -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -x c++ -fopenmp -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-host.bc -o - | FileCheck %s --check-prefixes=CHECK-CPP,CHECK
+
+#ifdef __cplusplus
+#include 
+#else
+#include 
+#endif
+
+void test_math_f64(double x) {
+// CHECK-LABEL: define {{.*}}test_math_f64
+#pragma omp target
+  {
+// CHECK: call double @__ocml_sin_f64
+double l1 = sin(x);
+// CHECK: call double @__ocml_cos_f64
+double l2 = cos(x);
+// CHECK: call double @__ocml_fabs_f64
+double l3 = fabs(x);
+  }
+}
+
+void test_math_f32(float x) {
+// CHECK-LABEL: define {{.*}}test_math_f32
+#pragma omp target
+  {
+// CHECK-C: call double @__ocml_sin_f64
+// CHECK-CPP: call float @__ocml_sin_f32
+float l1 = sin(x);
+// CHECK-C: call double @__ocml_cos_f64
+// CHECK-CPP: call float @__ocml_cos_f32
+float l2 = cos(x);
+// CHECK-C: call double @__ocml_fabs_f64
+// CHECK-CPP: call float @__ocml_fabs_f32
+float l3 = fabs(x);
+  }
+}
+void test_math_f32_suffix(float x) {
+// CHECK-LABEL: define {{.*}}test_math_f32_suffix
+#pragma omp target
+  {
+// CHECK: call float @__ocml_sin_f32
+float l1 = sinf(x);
+// CHECK: call float @__ocml_cos_f32
+float l2 = cosf(x);
+// CHECK: call float @__ocml_fabs_f32
+float l3 = fabsf(x);
+  }
+}
Index: clang/test/Headers/Inputs/include/utility
===
--- /dev/null
+++ clang/test/Headers/Inputs/include/utility
@@ -0,0 +1,2 @@
+#pragma once
+
Index: clang/test/Headers/Inputs/include/cstdlib
===
--- clang/test/Headers/Inputs/include/cstdlib
+++ clang/test/Headers/Inputs/include/cstdlib
@@ -27,3 +27,4 @@
 double abs(double __x) { return fabs(__x); }
 
 }
+
Index: 

[PATCH] D104904: [OpenMP][AMDGCN] Initial math headers support

2021-08-02 Thread Pushpinder Singh via Phabricator via cfe-commits
pdhaliwal added a comment.

@ye-luo and @JonChesterfield can you please test the latest version of this 
patch? It should work now.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D104904/new/

https://reviews.llvm.org/D104904

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D104904: [OpenMP][AMDGCN] Initial math headers support

2021-08-02 Thread Pushpinder Singh via Phabricator via cfe-commits
pdhaliwal updated this revision to Diff 363387.
pdhaliwal added a comment.

Fixed compilation error for nvptx headers. Tested on both cuda and non-cuda 
systems.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D104904/new/

https://reviews.llvm.org/D104904

Files:
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/lib/Headers/__clang_hip_cmath.h
  clang/lib/Headers/__clang_hip_math.h
  clang/lib/Headers/openmp_wrappers/__clang_openmp_device_functions.h
  clang/lib/Headers/openmp_wrappers/cmath
  clang/lib/Headers/openmp_wrappers/math.h
  clang/test/Headers/Inputs/include/algorithm
  clang/test/Headers/Inputs/include/cstdlib
  clang/test/Headers/Inputs/include/utility
  clang/test/Headers/amdgcn_openmp_device_math.c
  clang/test/Headers/openmp_device_math_isnan.cpp

Index: clang/test/Headers/openmp_device_math_isnan.cpp
===
--- clang/test/Headers/openmp_device_math_isnan.cpp
+++ clang/test/Headers/openmp_device_math_isnan.cpp
@@ -21,14 +21,14 @@
 double math(float f, double d) {
   double r = 0;
   // INT_RETURN: call i32 @__nv_isnanf(float
-  // AMD_INT_RETURN: call i32 @_{{.*}}isnanf(float
+  // AMD_INT_RETURN: call i32 @__ocml_isnan_f32(float
   // BOOL_RETURN: call i32 @__nv_isnanf(float
-  // AMD_BOOL_RETURN: call zeroext i1 @_{{.*}}isnanf(float
+  // AMD_BOOL_RETURN: call i32 @__ocml_isnan_f32(float
   r += std::isnan(f);
   // INT_RETURN: call i32 @__nv_isnand(double
-  // AMD_INT_RETURN: call i32 @_{{.*}}isnand(double
+  // AMD_INT_RETURN: call i32 @__ocml_isnan_f64(double
   // BOOL_RETURN: call i32 @__nv_isnand(double
-  // AMD_BOOL_RETURN: call zeroext i1 @_{{.*}}isnand(double
+  // AMD_BOOL_RETURN: call i32 @__ocml_isnan_f64(double
   r += std::isnan(d);
   return r;
 }
Index: clang/test/Headers/amdgcn_openmp_device_math.c
===
--- /dev/null
+++ clang/test/Headers/amdgcn_openmp_device_math.c
@@ -0,0 +1,51 @@
+// RUN: %clang_cc1 -internal-isystem %S/Inputs/include -x c -fopenmp -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-host.bc
+// RUN: %clang_cc1 -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -x c -fopenmp -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-host.bc -o - | FileCheck %s --check-prefixes=CHECK-C,CHECK
+// RUN: %clang_cc1 -internal-isystem %S/Inputs/include -x c++ -fopenmp -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-host.bc
+// RUN: %clang_cc1 -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -x c++ -fopenmp -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-host.bc -o - | FileCheck %s --check-prefixes=CHECK-CPP,CHECK
+
+#ifdef __cplusplus
+#include 
+#else
+#include 
+#endif
+
+void test_math_f64(double x) {
+// CHECK-LABEL: define {{.*}}test_math_f64
+#pragma omp target
+  {
+// CHECK: call double @__ocml_sin_f64
+double l1 = sin(x);
+// CHECK: call double @__ocml_cos_f64
+double l2 = cos(x);
+// CHECK: call double @__ocml_fabs_f64
+double l3 = fabs(x);
+  }
+}
+
+void test_math_f32(float x) {
+// CHECK-LABEL: define {{.*}}test_math_f32
+#pragma omp target
+  {
+// CHECK-C: call double @__ocml_sin_f64
+// CHECK-CPP: call float @__ocml_sin_f32
+float l1 = sin(x);
+// CHECK-C: call double @__ocml_cos_f64
+// CHECK-CPP: call float @__ocml_cos_f32
+float l2 = cos(x);
+// CHECK-C: call double @__ocml_fabs_f64
+// CHECK-CPP: call float @__ocml_fabs_f32
+float l3 = fabs(x);
+  }
+}
+void test_math_f32_suffix(float x) {
+// CHECK-LABEL: define {{.*}}test_math_f32_suffix
+#pragma omp target
+  {
+// CHECK: call float @__ocml_sin_f32
+float l1 = sinf(x);
+// CHECK: call float @__ocml_cos_f32
+float l2 = cosf(x);
+// CHECK: call float @__ocml_fabs_f32
+float l3 = fabsf(x);
+  }
+}
Index: clang/test/Headers/Inputs/include/utility
===
--- /dev/null
+++ clang/test/Headers/Inputs/include/utility
@@ -0,0 +1,2 @@
+#pragma once
+
Index: clang/test/Headers/Inputs/include/cstdlib
===
--- clang/test/Headers/Inputs/include/cstdlib
+++ clang/test/Headers/Inputs/include/cstdlib
@@ -27,3 +27,4 @@
 double abs(double __x) { return fabs(__x); }
 
 }
+
Index: clang/test/Headers/Inputs/include/algorithm

[PATCH] D104904: [OpenMP][AMDGCN] Initial math headers support

2021-07-30 Thread Pushpinder Singh via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG12da97ea10a9: [OpenMP][AMDGCN] Initial math headers support 
(authored by pdhaliwal).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D104904/new/

https://reviews.llvm.org/D104904

Files:
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/lib/Headers/__clang_hip_cmath.h
  clang/lib/Headers/__clang_hip_math.h
  clang/lib/Headers/openmp_wrappers/__clang_openmp_device_functions.h
  clang/lib/Headers/openmp_wrappers/cmath
  clang/lib/Headers/openmp_wrappers/math.h
  clang/test/Headers/Inputs/include/algorithm
  clang/test/Headers/Inputs/include/cstdlib
  clang/test/Headers/Inputs/include/utility
  clang/test/Headers/amdgcn_openmp_device_math.c
  clang/test/Headers/openmp_device_math_isnan.cpp

Index: clang/test/Headers/openmp_device_math_isnan.cpp
===
--- clang/test/Headers/openmp_device_math_isnan.cpp
+++ clang/test/Headers/openmp_device_math_isnan.cpp
@@ -21,14 +21,14 @@
 double math(float f, double d) {
   double r = 0;
   // INT_RETURN: call i32 @__nv_isnanf(float
-  // AMD_INT_RETURN: call i32 @_{{.*}}isnanf(float
+  // AMD_INT_RETURN: call i32 @__ocml_isnan_f32(float
   // BOOL_RETURN: call i32 @__nv_isnanf(float
-  // AMD_BOOL_RETURN: call zeroext i1 @_{{.*}}isnanf(float
+  // AMD_BOOL_RETURN: call i32 @__ocml_isnan_f32(float
   r += std::isnan(f);
   // INT_RETURN: call i32 @__nv_isnand(double
-  // AMD_INT_RETURN: call i32 @_{{.*}}isnand(double
+  // AMD_INT_RETURN: call i32 @__ocml_isnan_f64(double
   // BOOL_RETURN: call i32 @__nv_isnand(double
-  // AMD_BOOL_RETURN: call zeroext i1 @_{{.*}}isnand(double
+  // AMD_BOOL_RETURN: call i32 @__ocml_isnan_f64(double
   r += std::isnan(d);
   return r;
 }
Index: clang/test/Headers/amdgcn_openmp_device_math.c
===
--- /dev/null
+++ clang/test/Headers/amdgcn_openmp_device_math.c
@@ -0,0 +1,51 @@
+// RUN: %clang_cc1 -internal-isystem %S/Inputs/include -x c -fopenmp -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-host.bc
+// RUN: %clang_cc1 -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -x c -fopenmp -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-host.bc -o - | FileCheck %s --check-prefixes=CHECK-C,CHECK
+// RUN: %clang_cc1 -internal-isystem %S/Inputs/include -x c++ -fopenmp -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-host.bc
+// RUN: %clang_cc1 -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -x c++ -fopenmp -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-host.bc -o - | FileCheck %s --check-prefixes=CHECK-CPP,CHECK
+
+#ifdef __cplusplus
+#include 
+#else
+#include 
+#endif
+
+void test_math_f64(double x) {
+// CHECK-LABEL: define {{.*}}test_math_f64
+#pragma omp target
+  {
+// CHECK: call double @__ocml_sin_f64
+double l1 = sin(x);
+// CHECK: call double @__ocml_cos_f64
+double l2 = cos(x);
+// CHECK: call double @__ocml_fabs_f64
+double l3 = fabs(x);
+  }
+}
+
+void test_math_f32(float x) {
+// CHECK-LABEL: define {{.*}}test_math_f32
+#pragma omp target
+  {
+// CHECK-C: call double @__ocml_sin_f64
+// CHECK-CPP: call float @__ocml_sin_f32
+float l1 = sin(x);
+// CHECK-C: call double @__ocml_cos_f64
+// CHECK-CPP: call float @__ocml_cos_f32
+float l2 = cos(x);
+// CHECK-C: call double @__ocml_fabs_f64
+// CHECK-CPP: call float @__ocml_fabs_f32
+float l3 = fabs(x);
+  }
+}
+void test_math_f32_suffix(float x) {
+// CHECK-LABEL: define {{.*}}test_math_f32_suffix
+#pragma omp target
+  {
+// CHECK: call float @__ocml_sin_f32
+float l1 = sinf(x);
+// CHECK: call float @__ocml_cos_f32
+float l2 = cosf(x);
+// CHECK: call float @__ocml_fabs_f32
+float l3 = fabsf(x);
+  }
+}
Index: clang/test/Headers/Inputs/include/utility
===
--- /dev/null
+++ clang/test/Headers/Inputs/include/utility
@@ -0,0 +1,2 @@
+#pragma once
+
Index: clang/test/Headers/Inputs/include/cstdlib
===
--- clang/test/Headers/Inputs/include/cstdlib
+++ clang/test/Headers/Inputs/include/cstdlib
@@ -27,3 +27,4 @@
 double abs(double __x) { return fabs(__x); }
 
 }
+
Index: 

[PATCH] D104904: [OpenMP][AMDGCN] Initial math headers support

2021-07-30 Thread Pushpinder Singh via Phabricator via cfe-commits
pdhaliwal updated this revision to Diff 363090.
pdhaliwal added a comment.

Addressed review comments.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D104904/new/

https://reviews.llvm.org/D104904

Files:
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/lib/Headers/__clang_hip_cmath.h
  clang/lib/Headers/__clang_hip_math.h
  clang/lib/Headers/openmp_wrappers/__clang_openmp_device_functions.h
  clang/lib/Headers/openmp_wrappers/cmath
  clang/lib/Headers/openmp_wrappers/math.h
  clang/test/Headers/Inputs/include/algorithm
  clang/test/Headers/Inputs/include/cstdlib
  clang/test/Headers/Inputs/include/utility
  clang/test/Headers/amdgcn_openmp_device_math.c
  clang/test/Headers/openmp_device_math_isnan.cpp

Index: clang/test/Headers/openmp_device_math_isnan.cpp
===
--- clang/test/Headers/openmp_device_math_isnan.cpp
+++ clang/test/Headers/openmp_device_math_isnan.cpp
@@ -21,14 +21,14 @@
 double math(float f, double d) {
   double r = 0;
   // INT_RETURN: call i32 @__nv_isnanf(float
-  // AMD_INT_RETURN: call i32 @_{{.*}}isnanf(float
+  // AMD_INT_RETURN: call i32 @__ocml_isnan_f32(float
   // BOOL_RETURN: call i32 @__nv_isnanf(float
-  // AMD_BOOL_RETURN: call zeroext i1 @_{{.*}}isnanf(float
+  // AMD_BOOL_RETURN: call i32 @__ocml_isnan_f32(float
   r += std::isnan(f);
   // INT_RETURN: call i32 @__nv_isnand(double
-  // AMD_INT_RETURN: call i32 @_{{.*}}isnand(double
+  // AMD_INT_RETURN: call i32 @__ocml_isnan_f64(double
   // BOOL_RETURN: call i32 @__nv_isnand(double
-  // AMD_BOOL_RETURN: call zeroext i1 @_{{.*}}isnand(double
+  // AMD_BOOL_RETURN: call i32 @__ocml_isnan_f64(double
   r += std::isnan(d);
   return r;
 }
Index: clang/test/Headers/amdgcn_openmp_device_math.c
===
--- /dev/null
+++ clang/test/Headers/amdgcn_openmp_device_math.c
@@ -0,0 +1,51 @@
+// RUN: %clang_cc1 -internal-isystem %S/Inputs/include -x c -fopenmp -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-host.bc
+// RUN: %clang_cc1 -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -x c -fopenmp -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-host.bc -o - | FileCheck %s --check-prefixes=CHECK-C,CHECK
+// RUN: %clang_cc1 -internal-isystem %S/Inputs/include -x c++ -fopenmp -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-host.bc
+// RUN: %clang_cc1 -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -x c++ -fopenmp -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-host.bc -o - | FileCheck %s --check-prefixes=CHECK-CPP,CHECK
+
+#ifdef __cplusplus
+#include 
+#else
+#include 
+#endif
+
+void test_math_f64(double x) {
+// CHECK-LABEL: define {{.*}}test_math_f64
+#pragma omp target
+  {
+// CHECK: call double @__ocml_sin_f64
+double l1 = sin(x);
+// CHECK: call double @__ocml_cos_f64
+double l2 = cos(x);
+// CHECK: call double @__ocml_fabs_f64
+double l3 = fabs(x);
+  }
+}
+
+void test_math_f32(float x) {
+// CHECK-LABEL: define {{.*}}test_math_f32
+#pragma omp target
+  {
+// CHECK-C: call double @__ocml_sin_f64
+// CHECK-CPP: call float @__ocml_sin_f32
+float l1 = sin(x);
+// CHECK-C: call double @__ocml_cos_f64
+// CHECK-CPP: call float @__ocml_cos_f32
+float l2 = cos(x);
+// CHECK-C: call double @__ocml_fabs_f64
+// CHECK-CPP: call float @__ocml_fabs_f32
+float l3 = fabs(x);
+  }
+}
+void test_math_f32_suffix(float x) {
+// CHECK-LABEL: define {{.*}}test_math_f32_suffix
+#pragma omp target
+  {
+// CHECK: call float @__ocml_sin_f32
+float l1 = sinf(x);
+// CHECK: call float @__ocml_cos_f32
+float l2 = cosf(x);
+// CHECK: call float @__ocml_fabs_f32
+float l3 = fabsf(x);
+  }
+}
Index: clang/test/Headers/Inputs/include/utility
===
--- /dev/null
+++ clang/test/Headers/Inputs/include/utility
@@ -0,0 +1,2 @@
+#pragma once
+
Index: clang/test/Headers/Inputs/include/cstdlib
===
--- clang/test/Headers/Inputs/include/cstdlib
+++ clang/test/Headers/Inputs/include/cstdlib
@@ -27,3 +27,4 @@
 double abs(double __x) { return fabs(__x); }
 
 }
+
Index: clang/test/Headers/Inputs/include/algorithm
===
--- /dev/null
+++ 

[PATCH] D105981: [AMDGPU][OpenMP] Support linking of math libraries

2021-07-30 Thread Pushpinder Singh via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rG9830f902e4d0: [AMDGPU][OpenMP] Support linking of math 
libraries (authored by pdhaliwal).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D105981/new/

https://reviews.llvm.org/D105981

Files:
  clang/lib/Driver/ToolChains/AMDGPU.cpp
  clang/lib/Driver/ToolChains/AMDGPU.h
  clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
  clang/lib/Driver/ToolChains/HIP.cpp
  clang/test/Driver/amdgpu-openmp-toolchain.c

Index: clang/test/Driver/amdgpu-openmp-toolchain.c
===
--- clang/test/Driver/amdgpu-openmp-toolchain.c
+++ clang/test/Driver/amdgpu-openmp-toolchain.c
@@ -74,3 +74,6 @@
 
 // RUN:   %clang -### --target=x86_64-unknown-linux-gnu -emit-llvm -S -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx803 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-EMIT-LLVM-IR
 // CHECK-EMIT-LLVM-IR: clang{{.*}}"-cc1"{{.*}}"-triple" "amdgcn-amd-amdhsa"{{.*}}"-emit-llvm"
+
+// RUN: env LIBRARY_PATH=%S/Inputs/hip_dev_lib %clang -### -target x86_64-pc-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx803 -lm --rocm-device-lib-path=%S/Inputs/rocm/amdgcn/bitcode %s 2>&1 | FileCheck %s --check-prefix=CHECK-LIB-DEVICE
+// CHECK-LIB-DEVICE: clang{{.*}}"-cc1"{{.*}}"-triple" "amdgcn-amd-amdhsa"{{.*}}"-mlink-builtin-bitcode"{{.*}}libomptarget-amdgcn-gfx803.bc"{{.*}}"-mlink-builtin-bitcode"{{.*}}ocml.bc" "-mlink-builtin-bitcode"{{.*}}ockl.bc" "-mlink-builtin-bitcode"{{.*}}oclc_daz_opt_on.bc" "-mlink-builtin-bitcode"{{.*}}oclc_unsafe_math_off.bc" "-mlink-builtin-bitcode"{{.*}}oclc_finite_only_off.bc" "-mlink-builtin-bitcode"{{.*}}oclc_correctly_rounded_sqrt_on.bc" "-mlink-builtin-bitcode"{{.*}}oclc_wavefrontsize64_on.bc" "-mlink-builtin-bitcode"{{.*}}oclc_isa_version_803.bc"
Index: clang/lib/Driver/ToolChains/HIP.cpp
===
--- clang/lib/Driver/ToolChains/HIP.cpp
+++ clang/lib/Driver/ToolChains/HIP.cpp
@@ -395,35 +395,8 @@
 }
 StringRef GpuArch = getGPUArch(DriverArgs);
 assert(!GpuArch.empty() && "Must have an explicit GPU arch.");
-(void)GpuArch;
-auto Kind = llvm::AMDGPU::parseArchAMDGCN(GpuArch);
-const StringRef CanonArch = llvm::AMDGPU::getArchNameAMDGCN(Kind);
-
-std::string LibDeviceFile = RocmInstallation.getLibDeviceFile(CanonArch);
-if (LibDeviceFile.empty()) {
-  getDriver().Diag(diag::err_drv_no_rocm_device_lib) << 1 << GpuArch;
-  return {};
-}
 
 // If --hip-device-lib is not set, add the default bitcode libraries.
-// TODO: There are way too many flags that change this. Do we need to check
-// them all?
-bool DAZ = DriverArgs.hasFlag(options::OPT_fgpu_flush_denormals_to_zero,
-  options::OPT_fno_gpu_flush_denormals_to_zero,
-  getDefaultDenormsAreZeroForTarget(Kind));
-bool FiniteOnly =
-DriverArgs.hasFlag(options::OPT_ffinite_math_only,
-   options::OPT_fno_finite_math_only, false);
-bool UnsafeMathOpt =
-DriverArgs.hasFlag(options::OPT_funsafe_math_optimizations,
-   options::OPT_fno_unsafe_math_optimizations, false);
-bool FastRelaxedMath = DriverArgs.hasFlag(
-options::OPT_ffast_math, options::OPT_fno_fast_math, false);
-bool CorrectSqrt = DriverArgs.hasFlag(
-options::OPT_fhip_fp32_correctly_rounded_divide_sqrt,
-options::OPT_fno_hip_fp32_correctly_rounded_divide_sqrt);
-bool Wave64 = isWave64(DriverArgs, Kind);
-
 if (DriverArgs.hasFlag(options::OPT_fgpu_sanitize,
options::OPT_fno_gpu_sanitize, false)) {
   auto AsanRTL = RocmInstallation.getAsanRTLPath();
@@ -442,10 +415,8 @@
 // Add the HIP specific bitcode library.
 BCLibs.push_back(RocmInstallation.getHIPPath().str());
 
-// Add the generic set of libraries.
-BCLibs.append(RocmInstallation.getCommonBitcodeLibs(
-DriverArgs, LibDeviceFile, Wave64, DAZ, FiniteOnly, UnsafeMathOpt,
-FastRelaxedMath, CorrectSqrt));
+// Add common device libraries like ocml etc.
+BCLibs.append(getCommonDeviceLibNames(DriverArgs, GpuArch.str()));
 
 // Add instrument lib.
 auto InstLib =
Index: clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
===
--- clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
+++ clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
@@ -9,12 +9,14 @@
 #include "AMDGPUOpenMP.h"
 #include "AMDGPU.h"
 #include "CommonArgs.h"
+#include "ToolChains/ROCm.h"
 #include "clang/Basic/DiagnosticDriver.h"
 #include "clang/Driver/Compilation.h"
 #include "clang/Driver/Driver.h"
 #include "clang/Driver/DriverDiagnostic.h"
 #include "clang/Driver/InputInfo.h"
 #include 

[PATCH] D104904: [OpenMP][AMDGCN] Initial math headers support

2021-07-30 Thread Pushpinder Singh via Phabricator via cfe-commits
pdhaliwal updated this revision to Diff 362997.
pdhaliwal added a comment.

It required some work to fix the failing lit test case. And many thanks to
@estewart for helping in that.

The current status is that we are now following the nvptx openmp strategy for
openmp math headers very closely. In this version of patch, there are bunch
of HIP cmath overloads which are disabled for AMDGPU openmp similar to nvptx.
This fixed the lit failure, but a large number of tests started failing in OvO.,
Reason being that there were some overloads which were used in the suite but 
were disabled earlier. In order to fix them, we had added definitions in the
openmp_wrappers/cmath for the missing overloads. With these changes,  OvO 
compiles 100% of the
mathematical_function test suite successfully. There are still 6/177 tests in
the suite which are producing wrong result.

Now my suggestion is to land this patch as it is and fix the remaining 6 tests
in a later patch.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D104904/new/

https://reviews.llvm.org/D104904

Files:
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/lib/Headers/__clang_hip_cmath.h
  clang/lib/Headers/__clang_hip_math.h
  clang/lib/Headers/openmp_wrappers/__clang_openmp_device_functions.h
  clang/lib/Headers/openmp_wrappers/cmath
  clang/lib/Headers/openmp_wrappers/math.h
  clang/test/Headers/Inputs/include/algorithm
  clang/test/Headers/Inputs/include/cstdlib
  clang/test/Headers/Inputs/include/utility
  clang/test/Headers/amdgcn_openmp_device_math.c
  clang/test/Headers/openmp_device_math_isnan.cpp

Index: clang/test/Headers/openmp_device_math_isnan.cpp
===
--- clang/test/Headers/openmp_device_math_isnan.cpp
+++ clang/test/Headers/openmp_device_math_isnan.cpp
@@ -21,14 +21,14 @@
 double math(float f, double d) {
   double r = 0;
   // INT_RETURN: call i32 @__nv_isnanf(float
-  // AMD_INT_RETURN: call i32 @_{{.*}}isnanf(float
+  // AMD_INT_RETURN: call i32 @__ocml_isnan_f32(float
   // BOOL_RETURN: call i32 @__nv_isnanf(float
-  // AMD_BOOL_RETURN: call zeroext i1 @_{{.*}}isnanf(float
+  // AMD_BOOL_RETURN: call i32 @__ocml_isnan_f32(float
   r += std::isnan(f);
   // INT_RETURN: call i32 @__nv_isnand(double
-  // AMD_INT_RETURN: call i32 @_{{.*}}isnand(double
+  // AMD_INT_RETURN: call i32 @__ocml_isnan_f64(double
   // BOOL_RETURN: call i32 @__nv_isnand(double
-  // AMD_BOOL_RETURN: call zeroext i1 @_{{.*}}isnand(double
+  // AMD_BOOL_RETURN: call i32 @__ocml_isnan_f64(double
   r += std::isnan(d);
   return r;
 }
Index: clang/test/Headers/amdgcn_openmp_device_math.c
===
--- /dev/null
+++ clang/test/Headers/amdgcn_openmp_device_math.c
@@ -0,0 +1,51 @@
+// RUN: %clang_cc1 -internal-isystem %S/Inputs/include -x c -fopenmp -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-host.bc
+// RUN: %clang_cc1 -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -x c -fopenmp -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-host.bc -o - | FileCheck %s --check-prefixes=CHECK-C,CHECK
+// RUN: %clang_cc1 -internal-isystem %S/Inputs/include -x c++ -fopenmp -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-host.bc
+// RUN: %clang_cc1 -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -x c++ -fopenmp -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-host.bc -o - | FileCheck %s --check-prefixes=CHECK-CPP,CHECK
+
+#ifdef __cplusplus
+#include 
+#else
+#include 
+#endif
+
+void test_math_f64(double x) {
+// CHECK-LABEL: define {{.*}}test_math_f64
+#pragma omp target
+  {
+// CHECK: call double @__ocml_sin_f64
+double l1 = sin(x);
+// CHECK: call double @__ocml_cos_f64
+double l2 = cos(x);
+// CHECK: call double @__ocml_fabs_f64
+double l3 = fabs(x);
+  }
+}
+
+void test_math_f32(float x) {
+// CHECK-LABEL: define {{.*}}test_math_f32
+#pragma omp target
+  {
+// CHECK-C: call double @__ocml_sin_f64
+// CHECK-CPP: call float @__ocml_sin_f32
+float l1 = sin(x);
+// CHECK-C: call double @__ocml_cos_f64
+// CHECK-CPP: call float @__ocml_cos_f32
+float l2 = cos(x);
+// CHECK-C: call double @__ocml_fabs_f64
+// CHECK-CPP: call float @__ocml_fabs_f32
+float l3 = fabs(x);
+  }
+}
+void test_math_f32_suffix(float x) {
+// CHECK-LABEL: define {{.*}}test_math_f32_suffix
+#pragma omp target
+  {
+  

[PATCH] D105981: [AMDGPU][OpenMP] Support linking of math libraries

2021-07-29 Thread Pushpinder Singh via Phabricator via cfe-commits
pdhaliwal updated this revision to Diff 362718.
pdhaliwal added a comment.

Missed comment.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D105981/new/

https://reviews.llvm.org/D105981

Files:
  clang/lib/Driver/ToolChains/AMDGPU.cpp
  clang/lib/Driver/ToolChains/AMDGPU.h
  clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
  clang/lib/Driver/ToolChains/HIP.cpp
  clang/test/Driver/amdgpu-openmp-toolchain.c

Index: clang/test/Driver/amdgpu-openmp-toolchain.c
===
--- clang/test/Driver/amdgpu-openmp-toolchain.c
+++ clang/test/Driver/amdgpu-openmp-toolchain.c
@@ -74,3 +74,6 @@
 
 // RUN:   %clang -### --target=x86_64-unknown-linux-gnu -emit-llvm -S -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx803 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-EMIT-LLVM-IR
 // CHECK-EMIT-LLVM-IR: clang{{.*}}"-cc1"{{.*}}"-triple" "amdgcn-amd-amdhsa"{{.*}}"-emit-llvm"
+
+// RUN: env LIBRARY_PATH=%S/Inputs/hip_dev_lib %clang -### -target x86_64-pc-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx803 -lm --rocm-device-lib-path=%S/Inputs/rocm/amdgcn/bitcode %s 2>&1 | FileCheck %s --check-prefix=CHECK-LIB-DEVICE
+// CHECK-LIB-DEVICE: clang{{.*}}"-cc1"{{.*}}"-triple" "amdgcn-amd-amdhsa"{{.*}}"-mlink-builtin-bitcode"{{.*}}libomptarget-amdgcn-gfx803.bc"{{.*}}"-mlink-builtin-bitcode"{{.*}}ocml.bc" "-mlink-builtin-bitcode"{{.*}}ockl.bc" "-mlink-builtin-bitcode"{{.*}}oclc_daz_opt_on.bc" "-mlink-builtin-bitcode"{{.*}}oclc_unsafe_math_off.bc" "-mlink-builtin-bitcode"{{.*}}oclc_finite_only_off.bc" "-mlink-builtin-bitcode"{{.*}}oclc_correctly_rounded_sqrt_on.bc" "-mlink-builtin-bitcode"{{.*}}oclc_wavefrontsize64_on.bc" "-mlink-builtin-bitcode"{{.*}}oclc_isa_version_803.bc"
Index: clang/lib/Driver/ToolChains/HIP.cpp
===
--- clang/lib/Driver/ToolChains/HIP.cpp
+++ clang/lib/Driver/ToolChains/HIP.cpp
@@ -395,35 +395,8 @@
 }
 StringRef GpuArch = getGPUArch(DriverArgs);
 assert(!GpuArch.empty() && "Must have an explicit GPU arch.");
-(void)GpuArch;
-auto Kind = llvm::AMDGPU::parseArchAMDGCN(GpuArch);
-const StringRef CanonArch = llvm::AMDGPU::getArchNameAMDGCN(Kind);
-
-std::string LibDeviceFile = RocmInstallation.getLibDeviceFile(CanonArch);
-if (LibDeviceFile.empty()) {
-  getDriver().Diag(diag::err_drv_no_rocm_device_lib) << 1 << GpuArch;
-  return {};
-}
 
 // If --hip-device-lib is not set, add the default bitcode libraries.
-// TODO: There are way too many flags that change this. Do we need to check
-// them all?
-bool DAZ = DriverArgs.hasFlag(options::OPT_fgpu_flush_denormals_to_zero,
-  options::OPT_fno_gpu_flush_denormals_to_zero,
-  getDefaultDenormsAreZeroForTarget(Kind));
-bool FiniteOnly =
-DriverArgs.hasFlag(options::OPT_ffinite_math_only,
-   options::OPT_fno_finite_math_only, false);
-bool UnsafeMathOpt =
-DriverArgs.hasFlag(options::OPT_funsafe_math_optimizations,
-   options::OPT_fno_unsafe_math_optimizations, false);
-bool FastRelaxedMath = DriverArgs.hasFlag(
-options::OPT_ffast_math, options::OPT_fno_fast_math, false);
-bool CorrectSqrt = DriverArgs.hasFlag(
-options::OPT_fhip_fp32_correctly_rounded_divide_sqrt,
-options::OPT_fno_hip_fp32_correctly_rounded_divide_sqrt);
-bool Wave64 = isWave64(DriverArgs, Kind);
-
 if (DriverArgs.hasFlag(options::OPT_fgpu_sanitize,
options::OPT_fno_gpu_sanitize, false)) {
   auto AsanRTL = RocmInstallation.getAsanRTLPath();
@@ -442,10 +415,8 @@
 // Add the HIP specific bitcode library.
 BCLibs.push_back(RocmInstallation.getHIPPath().str());
 
-// Add the generic set of libraries.
-BCLibs.append(RocmInstallation.getCommonBitcodeLibs(
-DriverArgs, LibDeviceFile, Wave64, DAZ, FiniteOnly, UnsafeMathOpt,
-FastRelaxedMath, CorrectSqrt));
+// Add common device libraries like ocml etc.
+BCLibs.append(getCommonDeviceLibNames(DriverArgs, GpuArch.str()));
 
 // Add instrument lib.
 auto InstLib =
Index: clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
===
--- clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
+++ clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
@@ -9,12 +9,14 @@
 #include "AMDGPUOpenMP.h"
 #include "AMDGPU.h"
 #include "CommonArgs.h"
+#include "ToolChains/ROCm.h"
 #include "clang/Basic/DiagnosticDriver.h"
 #include "clang/Driver/Compilation.h"
 #include "clang/Driver/Driver.h"
 #include "clang/Driver/DriverDiagnostic.h"
 #include "clang/Driver/InputInfo.h"
 #include "clang/Driver/Options.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/FileSystem.h"
 

[PATCH] D105981: [AMDGPU][OpenMP] Support linking of math libraries

2021-07-29 Thread Pushpinder Singh via Phabricator via cfe-commits
pdhaliwal updated this revision to Diff 362717.
pdhaliwal added a comment.

Rename method to getCommonDeviceLibNames


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D105981/new/

https://reviews.llvm.org/D105981

Files:
  clang/lib/Driver/ToolChains/AMDGPU.cpp
  clang/lib/Driver/ToolChains/AMDGPU.h
  clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
  clang/lib/Driver/ToolChains/HIP.cpp
  clang/test/Driver/amdgpu-openmp-toolchain.c

Index: clang/test/Driver/amdgpu-openmp-toolchain.c
===
--- clang/test/Driver/amdgpu-openmp-toolchain.c
+++ clang/test/Driver/amdgpu-openmp-toolchain.c
@@ -74,3 +74,6 @@
 
 // RUN:   %clang -### --target=x86_64-unknown-linux-gnu -emit-llvm -S -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx803 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-EMIT-LLVM-IR
 // CHECK-EMIT-LLVM-IR: clang{{.*}}"-cc1"{{.*}}"-triple" "amdgcn-amd-amdhsa"{{.*}}"-emit-llvm"
+
+// RUN: env LIBRARY_PATH=%S/Inputs/hip_dev_lib %clang -### -target x86_64-pc-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx803 -lm --rocm-device-lib-path=%S/Inputs/rocm/amdgcn/bitcode %s 2>&1 | FileCheck %s --check-prefix=CHECK-LIB-DEVICE
+// CHECK-LIB-DEVICE: clang{{.*}}"-cc1"{{.*}}"-triple" "amdgcn-amd-amdhsa"{{.*}}"-mlink-builtin-bitcode"{{.*}}libomptarget-amdgcn-gfx803.bc"{{.*}}"-mlink-builtin-bitcode"{{.*}}ocml.bc" "-mlink-builtin-bitcode"{{.*}}ockl.bc" "-mlink-builtin-bitcode"{{.*}}oclc_daz_opt_on.bc" "-mlink-builtin-bitcode"{{.*}}oclc_unsafe_math_off.bc" "-mlink-builtin-bitcode"{{.*}}oclc_finite_only_off.bc" "-mlink-builtin-bitcode"{{.*}}oclc_correctly_rounded_sqrt_on.bc" "-mlink-builtin-bitcode"{{.*}}oclc_wavefrontsize64_on.bc" "-mlink-builtin-bitcode"{{.*}}oclc_isa_version_803.bc"
Index: clang/lib/Driver/ToolChains/HIP.cpp
===
--- clang/lib/Driver/ToolChains/HIP.cpp
+++ clang/lib/Driver/ToolChains/HIP.cpp
@@ -395,35 +395,8 @@
 }
 StringRef GpuArch = getGPUArch(DriverArgs);
 assert(!GpuArch.empty() && "Must have an explicit GPU arch.");
-(void)GpuArch;
-auto Kind = llvm::AMDGPU::parseArchAMDGCN(GpuArch);
-const StringRef CanonArch = llvm::AMDGPU::getArchNameAMDGCN(Kind);
-
-std::string LibDeviceFile = RocmInstallation.getLibDeviceFile(CanonArch);
-if (LibDeviceFile.empty()) {
-  getDriver().Diag(diag::err_drv_no_rocm_device_lib) << 1 << GpuArch;
-  return {};
-}
 
 // If --hip-device-lib is not set, add the default bitcode libraries.
-// TODO: There are way too many flags that change this. Do we need to check
-// them all?
-bool DAZ = DriverArgs.hasFlag(options::OPT_fgpu_flush_denormals_to_zero,
-  options::OPT_fno_gpu_flush_denormals_to_zero,
-  getDefaultDenormsAreZeroForTarget(Kind));
-bool FiniteOnly =
-DriverArgs.hasFlag(options::OPT_ffinite_math_only,
-   options::OPT_fno_finite_math_only, false);
-bool UnsafeMathOpt =
-DriverArgs.hasFlag(options::OPT_funsafe_math_optimizations,
-   options::OPT_fno_unsafe_math_optimizations, false);
-bool FastRelaxedMath = DriverArgs.hasFlag(
-options::OPT_ffast_math, options::OPT_fno_fast_math, false);
-bool CorrectSqrt = DriverArgs.hasFlag(
-options::OPT_fhip_fp32_correctly_rounded_divide_sqrt,
-options::OPT_fno_hip_fp32_correctly_rounded_divide_sqrt);
-bool Wave64 = isWave64(DriverArgs, Kind);
-
 if (DriverArgs.hasFlag(options::OPT_fgpu_sanitize,
options::OPT_fno_gpu_sanitize, false)) {
   auto AsanRTL = RocmInstallation.getAsanRTLPath();
@@ -442,10 +415,8 @@
 // Add the HIP specific bitcode library.
 BCLibs.push_back(RocmInstallation.getHIPPath().str());
 
-// Add the generic set of libraries.
-BCLibs.append(RocmInstallation.getCommonBitcodeLibs(
-DriverArgs, LibDeviceFile, Wave64, DAZ, FiniteOnly, UnsafeMathOpt,
-FastRelaxedMath, CorrectSqrt));
+// Add common device libraries like ocml etc.
+BCLibs.append(getCommonDeviceLibNames(DriverArgs, GpuArch.str()));
 
 // Add instrument lib.
 auto InstLib =
Index: clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
===
--- clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
+++ clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
@@ -9,12 +9,14 @@
 #include "AMDGPUOpenMP.h"
 #include "AMDGPU.h"
 #include "CommonArgs.h"
+#include "ToolChains/ROCm.h"
 #include "clang/Basic/DiagnosticDriver.h"
 #include "clang/Driver/Compilation.h"
 #include "clang/Driver/Driver.h"
 #include "clang/Driver/DriverDiagnostic.h"
 #include "clang/Driver/InputInfo.h"
 #include "clang/Driver/Options.h"
+#include "llvm/ADT/STLExtras.h"
 #include 

[PATCH] D105981: [AMDGPU][OpenMP] Support linking of math libraries

2021-07-28 Thread Pushpinder Singh via Phabricator via cfe-commits
pdhaliwal added a comment.

Due to the current state of math headers, I was unable to test this patch 
without ockl. But last time when headers were working, I was actually required 
to link ockl for a symbol (I forgot the name). I will update once I am able to 
get the math headers work again.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D105981/new/

https://reviews.llvm.org/D105981

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D105981: [AMDGPU][OpenMP] Support linking of math libraries

2021-07-27 Thread Pushpinder Singh via Phabricator via cfe-commits
pdhaliwal added inline comments.



Comment at: clang/lib/Driver/ToolChains/AMDGPU.cpp:923-924
+  bool CorrectSqrt = DriverArgs.hasFlag(
+  options::OPT_fhip_fp32_correctly_rounded_divide_sqrt,
+  options::OPT_fno_hip_fp32_correctly_rounded_divide_sqrt);
+  bool Wave64 = isWave64(DriverArgs, Kind);

I wanted to rename these to something generic like -fgpu-fp32 but due to 
some weird reason aliasing wasn't working. Anyhow, my suggestion is to make 
that change in separate patch.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D105981/new/

https://reviews.llvm.org/D105981

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D105981: [AMDGPU][OpenMP] Support linking of math libraries

2021-07-27 Thread Pushpinder Singh via Phabricator via cfe-commits
pdhaliwal updated this revision to Diff 361967.
pdhaliwal added a comment.

Extract the options from HIP/OpenMP to a common method in base class.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D105981/new/

https://reviews.llvm.org/D105981

Files:
  clang/lib/Driver/ToolChains/AMDGPU.cpp
  clang/lib/Driver/ToolChains/AMDGPU.h
  clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
  clang/lib/Driver/ToolChains/HIP.cpp
  clang/test/Driver/amdgpu-openmp-toolchain.c

Index: clang/test/Driver/amdgpu-openmp-toolchain.c
===
--- clang/test/Driver/amdgpu-openmp-toolchain.c
+++ clang/test/Driver/amdgpu-openmp-toolchain.c
@@ -74,3 +74,6 @@
 
 // RUN:   %clang -### --target=x86_64-unknown-linux-gnu -emit-llvm -S -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx803 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-EMIT-LLVM-IR
 // CHECK-EMIT-LLVM-IR: clang{{.*}}"-cc1"{{.*}}"-triple" "amdgcn-amd-amdhsa"{{.*}}"-emit-llvm"
+
+// RUN: env LIBRARY_PATH=%S/Inputs/hip_dev_lib %clang -### -target x86_64-pc-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx803 -lm --rocm-device-lib-path=%S/Inputs/rocm/amdgcn/bitcode %s 2>&1 | FileCheck %s --check-prefix=CHECK-LIB-DEVICE
+// CHECK-LIB-DEVICE: clang{{.*}}"-cc1"{{.*}}"-triple" "amdgcn-amd-amdhsa"{{.*}}"-mlink-builtin-bitcode"{{.*}}libomptarget-amdgcn-gfx803.bc"{{.*}}"-mlink-builtin-bitcode"{{.*}}ocml.bc" "-mlink-builtin-bitcode"{{.*}}ockl.bc" "-mlink-builtin-bitcode"{{.*}}oclc_daz_opt_on.bc" "-mlink-builtin-bitcode"{{.*}}oclc_unsafe_math_off.bc" "-mlink-builtin-bitcode"{{.*}}oclc_finite_only_off.bc" "-mlink-builtin-bitcode"{{.*}}oclc_correctly_rounded_sqrt_on.bc" "-mlink-builtin-bitcode"{{.*}}oclc_wavefrontsize64_on.bc" "-mlink-builtin-bitcode"{{.*}}oclc_isa_version_803.bc"
Index: clang/lib/Driver/ToolChains/HIP.cpp
===
--- clang/lib/Driver/ToolChains/HIP.cpp
+++ clang/lib/Driver/ToolChains/HIP.cpp
@@ -395,35 +395,8 @@
 }
 StringRef GpuArch = getGPUArch(DriverArgs);
 assert(!GpuArch.empty() && "Must have an explicit GPU arch.");
-(void)GpuArch;
-auto Kind = llvm::AMDGPU::parseArchAMDGCN(GpuArch);
-const StringRef CanonArch = llvm::AMDGPU::getArchNameAMDGCN(Kind);
-
-std::string LibDeviceFile = RocmInstallation.getLibDeviceFile(CanonArch);
-if (LibDeviceFile.empty()) {
-  getDriver().Diag(diag::err_drv_no_rocm_device_lib) << 1 << GpuArch;
-  return {};
-}
 
 // If --hip-device-lib is not set, add the default bitcode libraries.
-// TODO: There are way too many flags that change this. Do we need to check
-// them all?
-bool DAZ = DriverArgs.hasFlag(options::OPT_fgpu_flush_denormals_to_zero,
-  options::OPT_fno_gpu_flush_denormals_to_zero,
-  getDefaultDenormsAreZeroForTarget(Kind));
-bool FiniteOnly =
-DriverArgs.hasFlag(options::OPT_ffinite_math_only,
-   options::OPT_fno_finite_math_only, false);
-bool UnsafeMathOpt =
-DriverArgs.hasFlag(options::OPT_funsafe_math_optimizations,
-   options::OPT_fno_unsafe_math_optimizations, false);
-bool FastRelaxedMath = DriverArgs.hasFlag(
-options::OPT_ffast_math, options::OPT_fno_fast_math, false);
-bool CorrectSqrt = DriverArgs.hasFlag(
-options::OPT_fhip_fp32_correctly_rounded_divide_sqrt,
-options::OPT_fno_hip_fp32_correctly_rounded_divide_sqrt);
-bool Wave64 = isWave64(DriverArgs, Kind);
-
 if (DriverArgs.hasFlag(options::OPT_fgpu_sanitize,
options::OPT_fno_gpu_sanitize, false)) {
   auto AsanRTL = RocmInstallation.getAsanRTLPath();
@@ -442,10 +415,8 @@
 // Add the HIP specific bitcode library.
 BCLibs.push_back(RocmInstallation.getHIPPath().str());
 
-// Add the generic set of libraries.
-BCLibs.append(RocmInstallation.getCommonBitcodeLibs(
-DriverArgs, LibDeviceFile, Wave64, DAZ, FiniteOnly, UnsafeMathOpt,
-FastRelaxedMath, CorrectSqrt));
+// Add common device libraries like ocml etc.
+BCLibs.append(getCommonDeviceLibOptions(DriverArgs, GpuArch.str()));
 
 // Add instrument lib.
 auto InstLib =
Index: clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
===
--- clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
+++ clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
@@ -10,11 +10,13 @@
 #include "AMDGPU.h"
 #include "CommonArgs.h"
 #include "InputInfo.h"
+#include "ToolChains/ROCm.h"
 #include "clang/Basic/DiagnosticDriver.h"
 #include "clang/Driver/Compilation.h"
 #include "clang/Driver/Driver.h"
 #include "clang/Driver/DriverDiagnostic.h"
 #include "clang/Driver/Options.h"
+#include "llvm/ADT/STLExtras.h"
 #include 

[PATCH] D105981: [AMDGPU][OpenMP] Support linking of math libraries

2021-07-14 Thread Pushpinder Singh via Phabricator via cfe-commits
pdhaliwal updated this revision to Diff 358614.
pdhaliwal added a comment.

Move linking logic to a common method.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D105981/new/

https://reviews.llvm.org/D105981

Files:
  clang/lib/Driver/ToolChains/AMDGPU.cpp
  clang/lib/Driver/ToolChains/AMDGPU.h
  clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
  clang/test/Driver/amdgpu-openmp-toolchain.c

Index: clang/test/Driver/amdgpu-openmp-toolchain.c
===
--- clang/test/Driver/amdgpu-openmp-toolchain.c
+++ clang/test/Driver/amdgpu-openmp-toolchain.c
@@ -74,3 +74,6 @@
 
 // RUN:   %clang -### --target=x86_64-unknown-linux-gnu -emit-llvm -S -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx803 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-EMIT-LLVM-IR
 // CHECK-EMIT-LLVM-IR: clang{{.*}}"-cc1"{{.*}}"-triple" "amdgcn-amd-amdhsa"{{.*}}"-emit-llvm"
+
+// RUN: env LIBRARY_PATH=%S/Inputs/hip_dev_lib %clang -### -target x86_64-pc-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx803 -lm --rocm-device-lib-path=%S/Inputs/rocm/amdgcn/bitcode %s 2>&1 | FileCheck %s --check-prefix=CHECK-LIB-DEVICE
+// CHECK-LIB-DEVICE: clang{{.*}}"-cc1"{{.*}}"-triple" "amdgcn-amd-amdhsa"{{.*}}"-mlink-builtin-bitcode"{{.*}}libomptarget-amdgcn-gfx803.bc"{{.*}}"-mlink-builtin-bitcode"{{.*}}ocml.bc" "-mlink-builtin-bitcode"{{.*}}ockl.bc" "-mlink-builtin-bitcode"{{.*}}oclc_daz_opt_on.bc" "-mlink-builtin-bitcode"{{.*}}oclc_unsafe_math_off.bc" "-mlink-builtin-bitcode"{{.*}}oclc_finite_only_off.bc" "-mlink-builtin-bitcode"{{.*}}oclc_correctly_rounded_sqrt_off.bc" "-mlink-builtin-bitcode"{{.*}}oclc_wavefrontsize64_on.bc" "-mlink-builtin-bitcode"{{.*}}oclc_isa_version_803.bc"
Index: clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
===
--- clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
+++ clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
@@ -10,6 +10,7 @@
 #include "AMDGPU.h"
 #include "CommonArgs.h"
 #include "InputInfo.h"
+#include "ToolChains/ROCm.h"
 #include "clang/Basic/DiagnosticDriver.h"
 #include "clang/Driver/Compilation.h"
 #include "clang/Driver/Driver.h"
@@ -225,6 +226,22 @@
   std::string BitcodeSuffix = "amdgcn-" + GPUArch;
   addOpenMPDeviceRTL(getDriver(), DriverArgs, CC1Args, BitcodeSuffix,
  getTriple());
+
+  if (!DriverArgs.hasArg(options::OPT_l))
+return;
+
+  auto Lm = DriverArgs.getAllArgValues(options::OPT_l);
+  bool HasLibm = false;
+  for (auto  : Lm) {
+if (Lib == "m") {
+  HasLibm = true;
+  break;
+}
+  }
+
+  if (HasLibm) {
+ROCMToolChain::addDeviceLibOptions(DriverArgs, CC1Args, GPUArch);
+  }
 }
 
 llvm::opt::DerivedArgList *AMDGPUOpenMPToolChain::TranslateArgs(
Index: clang/lib/Driver/ToolChains/AMDGPU.h
===
--- clang/lib/Driver/ToolChains/AMDGPU.h
+++ clang/lib/Driver/ToolChains/AMDGPU.h
@@ -136,6 +136,10 @@
   addClangTargetOptions(const llvm::opt::ArgList ,
 llvm::opt::ArgStringList ,
 Action::OffloadKind DeviceOffloadKind) const override;
+
+  void addDeviceLibOptions(const llvm::opt::ArgList ,
+   llvm::opt::ArgStringList ,
+   const std::string ) const;
 };
 
 } // end namespace toolchains
Index: clang/lib/Driver/ToolChains/AMDGPU.cpp
===
--- clang/lib/Driver/ToolChains/AMDGPU.cpp
+++ clang/lib/Driver/ToolChains/AMDGPU.cpp
@@ -826,43 +826,12 @@
 return;
   }
 
-  // Get the device name and canonicalize it
+  // Get the device name
   const StringRef GpuArch = getGPUArch(DriverArgs);
-  auto Kind = llvm::AMDGPU::parseArchAMDGCN(GpuArch);
-  const StringRef CanonArch = llvm::AMDGPU::getArchNameAMDGCN(Kind);
-  std::string LibDeviceFile = RocmInstallation.getLibDeviceFile(CanonArch);
-  if (LibDeviceFile.empty()) {
-getDriver().Diag(diag::err_drv_no_rocm_device_lib) << 1 << GpuArch;
-return;
-  }
-
-  bool Wave64 = isWave64(DriverArgs, Kind);
-
-  // TODO: There are way too many flags that change this. Do we need to check
-  // them all?
-  bool DAZ = DriverArgs.hasArg(options::OPT_cl_denorms_are_zero) ||
- getDefaultDenormsAreZeroForTarget(Kind);
-  bool FiniteOnly = DriverArgs.hasArg(options::OPT_cl_finite_math_only);
-
-  bool UnsafeMathOpt =
-  DriverArgs.hasArg(options::OPT_cl_unsafe_math_optimizations);
-  bool FastRelaxedMath = DriverArgs.hasArg(options::OPT_cl_fast_relaxed_math);
-  bool CorrectSqrt =
-  DriverArgs.hasArg(options::OPT_cl_fp32_correctly_rounded_divide_sqrt);
-
-  // Add the OpenCL specific bitcode library.
-  llvm::SmallVector BCLibs;
-  BCLibs.push_back(RocmInstallation.getOpenCLPath().str());
-
-  // Add the generic set of libraries.
-  

[PATCH] D105981: [AMDGPU][OpenMP] Support linking of math libraries

2021-07-14 Thread Pushpinder Singh via Phabricator via cfe-commits
pdhaliwal added inline comments.



Comment at: clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp:252
+bool Wave64 = isWave64(DriverArgs, Kind);
+
+// TODO: There are way too many flags that change this. Do we need to check

JonChesterfield wrote:
> I recognise this comment. Is this a bunch of logic that can be moved into the 
> base class and then called from here and hip?
This is copied (after removing stuff related to opencl) from 
https://github.com/llvm/llvm-project/blob/main/clang/lib/Driver/ToolChains/AMDGPU.cpp#L841
 
I wanted to make call to `ROCMToolChain::addClangTargetOptions`, but there is 
some extra logic in it which is irrelevant to OpenMP. I will move the library 
linking into a separate common method as you suggest.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D105981/new/

https://reviews.llvm.org/D105981

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D105981: [AMDGPU][OpenMP] Support linking of math libraries

2021-07-14 Thread Pushpinder Singh via Phabricator via cfe-commits
pdhaliwal created this revision.
pdhaliwal added reviewers: JonChesterfield, ronlieb, jdoerfert.
Herald added subscribers: kerbowa, guansong, t-tye, tpr, dstuttard, yaxunl, 
nhaehnle, jvesely, kzhuravl.
pdhaliwal requested review of this revision.
Herald added subscribers: cfe-commits, sstefan1, wdng.
Herald added a project: clang.

Math libraries are linked only when -lm is specified. This is because
host system could be missing rocm-device-libs.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D105981

Files:
  clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
  clang/test/Driver/amdgpu-openmp-toolchain.c


Index: clang/test/Driver/amdgpu-openmp-toolchain.c
===
--- clang/test/Driver/amdgpu-openmp-toolchain.c
+++ clang/test/Driver/amdgpu-openmp-toolchain.c
@@ -74,3 +74,6 @@
 
 // RUN:   %clang -### --target=x86_64-unknown-linux-gnu -emit-llvm -S -fopenmp 
-fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa 
-march=gfx803 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-EMIT-LLVM-IR
 // CHECK-EMIT-LLVM-IR: clang{{.*}}"-cc1"{{.*}}"-triple" 
"amdgcn-amd-amdhsa"{{.*}}"-emit-llvm"
+
+// RUN: env LIBRARY_PATH=%S/Inputs/hip_dev_lib %clang -### -target 
x86_64-pc-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa 
-Xopenmp-target=amdgcn-amd-amdhsa -march=gfx803 -lm 
--rocm-device-lib-path=%S/Inputs/rocm/amdgcn/bitcode %s 2>&1 | FileCheck %s 
--check-prefix=CHECK-LIB-DEVICE
+// CHECK-LIB-DEVICE: clang{{.*}}"-cc1"{{.*}}"-triple" 
"amdgcn-amd-amdhsa"{{.*}}"-mlink-builtin-bitcode"{{.*}}libomptarget-amdgcn-gfx803.bc"{{.*}}"-mlink-builtin-bitcode"{{.*}}ocml.bc"
 "-mlink-builtin-bitcode"{{.*}}ockl.bc" 
"-mlink-builtin-bitcode"{{.*}}oclc_daz_opt_on.bc" 
"-mlink-builtin-bitcode"{{.*}}oclc_unsafe_math_off.bc" 
"-mlink-builtin-bitcode"{{.*}}oclc_finite_only_off.bc" 
"-mlink-builtin-bitcode"{{.*}}oclc_correctly_rounded_sqrt_off.bc" 
"-mlink-builtin-bitcode"{{.*}}oclc_wavefrontsize64_on.bc" 
"-mlink-builtin-bitcode"{{.*}}oclc_isa_version_803.bc"
Index: clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
===
--- clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
+++ clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
@@ -10,6 +10,7 @@
 #include "AMDGPU.h"
 #include "CommonArgs.h"
 #include "InputInfo.h"
+#include "ToolChains/ROCm.h"
 #include "clang/Basic/DiagnosticDriver.h"
 #include "clang/Driver/Compilation.h"
 #include "clang/Driver/Driver.h"
@@ -225,6 +226,53 @@
   std::string BitcodeSuffix = "amdgcn-" + GPUArch;
   addOpenMPDeviceRTL(getDriver(), DriverArgs, CC1Args, BitcodeSuffix,
  getTriple());
+
+  if (!DriverArgs.hasArg(options::OPT_l))
+return;
+
+  auto Lm = DriverArgs.getAllArgValues(options::OPT_l);
+  bool HasLibm = false;
+  for (auto  : Lm) {
+if (Lib == "m") {
+  HasLibm = true;
+  break;
+}
+  }
+
+  if (HasLibm) {
+auto Kind = llvm::AMDGPU::parseArchAMDGCN(GPUArch);
+const StringRef CanonArch = llvm::AMDGPU::getArchNameAMDGCN(Kind);
+std::string LibDeviceFile = RocmInstallation.getLibDeviceFile(CanonArch);
+if (LibDeviceFile.empty()) {
+  getDriver().Diag(diag::err_drv_no_rocm_device_lib) << 1 << GPUArch;
+  return;
+}
+
+bool Wave64 = isWave64(DriverArgs, Kind);
+
+// TODO: There are way too many flags that change this. Do we need to check
+// them all?
+bool DAZ = DriverArgs.hasArg(options::OPT_cl_denorms_are_zero) ||
+  getDefaultDenormsAreZeroForTarget(Kind);
+bool FiniteOnly = DriverArgs.hasArg(options::OPT_cl_finite_math_only);
+
+bool UnsafeMathOpt =
+DriverArgs.hasArg(options::OPT_cl_unsafe_math_optimizations);
+bool FastRelaxedMath = 
DriverArgs.hasArg(options::OPT_cl_fast_relaxed_math);
+bool CorrectSqrt =
+DriverArgs.hasArg(options::OPT_cl_fp32_correctly_rounded_divide_sqrt);
+
+// Add the generic set of libraries.
+llvm::SmallVector BCLibs;
+BCLibs.append(RocmInstallation.getCommonBitcodeLibs(
+DriverArgs, LibDeviceFile, Wave64, DAZ, FiniteOnly, UnsafeMathOpt,
+FastRelaxedMath, CorrectSqrt));
+
+llvm::for_each(BCLibs, [&](StringRef BCFile) {
+  CC1Args.push_back("-mlink-builtin-bitcode");
+  CC1Args.push_back(DriverArgs.MakeArgString(BCFile));
+});
+  }
 }
 
 llvm::opt::DerivedArgList *AMDGPUOpenMPToolChain::TranslateArgs(


Index: clang/test/Driver/amdgpu-openmp-toolchain.c
===
--- clang/test/Driver/amdgpu-openmp-toolchain.c
+++ clang/test/Driver/amdgpu-openmp-toolchain.c
@@ -74,3 +74,6 @@
 
 // RUN:   %clang -### --target=x86_64-unknown-linux-gnu -emit-llvm -S -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx803 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-EMIT-LLVM-IR
 // CHECK-EMIT-LLVM-IR: clang{{.*}}"-cc1"{{.*}}"-triple" "amdgcn-amd-amdhsa"{{.*}}"-emit-llvm"
+
+// 

[PATCH] D105221: [openmp][nfc] Simplify macros guarding math complex headers

2021-07-14 Thread Pushpinder Singh via Phabricator via cfe-commits
pdhaliwal added a comment.

Looks ok to me. Regression tests and runtime tests went fine. Tested a simple 
cuda and openmp kernel with `sin` function on sm_61, didn't see any issue.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D105221/new/

https://reviews.llvm.org/D105221

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D105221: [openmp][nfc] Simplify macros guarding math complex headers

2021-07-01 Thread Pushpinder Singh via Phabricator via cfe-commits
pdhaliwal added a comment.

Should the name of file be changed as well?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D105221/new/

https://reviews.llvm.org/D105221

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D104904: [OpenMP][AMDGCN] Initial math headers support

2021-06-30 Thread Pushpinder Singh via Phabricator via cfe-commits
pdhaliwal added inline comments.



Comment at: clang/lib/Headers/__clang_hip_cmath.h:96
+__DEVICE__ __CONSTEXPR__ bool isnan(float __x) { return ::__isnanf(__x); }
+__DEVICE__ __CONSTEXPR__ bool isnan(double __x) { return ::__isnan(__x); }
 

jdoerfert wrote:
> ^ This is how OpenMP resolves the overload issue wrt. different return types.
I tried the exact same way. The lit tests compile and run fine. I could not get 
the runtime tests compile without the errors. It might be that I am not using 
match patterns correctly. I also tried some other combinations of the match 
selector but none of them worked.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D104904/new/

https://reviews.llvm.org/D104904

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D104904: [OpenMP][AMDGCN] Initial math headers support

2021-06-28 Thread Pushpinder Singh via Phabricator via cfe-commits
pdhaliwal added inline comments.



Comment at: clang/lib/Headers/__clang_hip_cmath.h:30
+#ifdef __OPENMP_AMDGCN__
+#define __DEVICE__ static __attribute__((always_inline, nothrow))
+#define __CONSTEXPR__ constexpr

ashi1 wrote:
> Does OpenMP not require `__device__` attribute here? I know constexpr defines 
> `__device__` on HIP, does OMP do the same?
It does not as these methods are inside declare variant.



Comment at: clang/lib/Headers/__clang_hip_cmath.h:32
+#define __CONSTEXPR__ constexpr
+#define __constant__ __attribute__((constant))
+#else

ashi1 wrote:
> I don't think this is the right place to define `__constant__`? It's unused 
> in this header, and may get forgotten. Would it be better to define it in the 
> openmp wrapper or does cmath define it in OpenMP?
It is being used. However, I have moved it to openmp_wrappers/cmath.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D104904/new/

https://reviews.llvm.org/D104904

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D104904: [OpenMP][AMDGCN] Initial math headers support

2021-06-28 Thread Pushpinder Singh via Phabricator via cfe-commits
pdhaliwal updated this revision to Diff 354913.
pdhaliwal marked 2 inline comments as done.
pdhaliwal added a comment.

- Move __constant__ to openmp_wrappers/cmath
- Using push/pop_macro to avoid redefinition


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D104904/new/

https://reviews.llvm.org/D104904

Files:
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/lib/Headers/__clang_hip_cmath.h
  clang/lib/Headers/__clang_hip_math.h
  clang/lib/Headers/openmp_wrappers/__clang_openmp_device_functions.h
  clang/lib/Headers/openmp_wrappers/cmath
  clang/lib/Headers/openmp_wrappers/math.h
  clang/test/Headers/Inputs/include/algorithm
  clang/test/Headers/Inputs/include/cstdlib
  clang/test/Headers/Inputs/include/utility
  clang/test/Headers/amdgcn_openmp_device_math.c
  clang/test/Headers/openmp_device_math_isnan.cpp

Index: clang/test/Headers/openmp_device_math_isnan.cpp
===
--- clang/test/Headers/openmp_device_math_isnan.cpp
+++ clang/test/Headers/openmp_device_math_isnan.cpp
@@ -21,14 +21,14 @@
 double math(float f, double d) {
   double r = 0;
   // INT_RETURN: call i32 @__nv_isnanf(float
-  // AMD_INT_RETURN: call i32 @_{{.*}}isnanf(float
+  // AMD_INT_RETURN: call i32 @__ocml_isnan_f32(float
   // BOOL_RETURN: call i32 @__nv_isnanf(float
-  // AMD_BOOL_RETURN: call zeroext i1 @_{{.*}}isnanf(float
+  // AMD_BOOL_RETURN: call i32 @__ocml_isnan_f32(float
   r += std::isnan(f);
   // INT_RETURN: call i32 @__nv_isnand(double
-  // AMD_INT_RETURN: call i32 @_{{.*}}isnand(double
+  // AMD_INT_RETURN: call i32 @__ocml_isnan_f64(double
   // BOOL_RETURN: call i32 @__nv_isnand(double
-  // AMD_BOOL_RETURN: call zeroext i1 @_{{.*}}isnand(double
+  // AMD_BOOL_RETURN: call i32 @__ocml_isnan_f64(double
   r += std::isnan(d);
   return r;
 }
Index: clang/test/Headers/amdgcn_openmp_device_math.c
===
--- /dev/null
+++ clang/test/Headers/amdgcn_openmp_device_math.c
@@ -0,0 +1,51 @@
+// RUN: %clang_cc1 -internal-isystem %S/Inputs/include -x c -fopenmp -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-host.bc
+// RUN: %clang_cc1 -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -x c -fopenmp -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-host.bc -o - | FileCheck %s --check-prefixes=CHECK-C,CHECK
+// RUN: %clang_cc1 -internal-isystem %S/Inputs/include -x c++ -fopenmp -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-host.bc
+// RUN: %clang_cc1 -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -x c++ -fopenmp -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-host.bc -o - | FileCheck %s --check-prefixes=CHECK-CPP,CHECK
+
+#ifdef __cplusplus
+#include 
+#else
+#include 
+#endif
+
+void test_math_f64(double x) {
+// CHECK-LABEL: define {{.*}}test_math_f64
+#pragma omp target
+  {
+// CHECK: call double @__ocml_sin_f64
+double l1 = sin(x);
+// CHECK: call double @__ocml_cos_f64
+double l2 = cos(x);
+// CHECK: call double @__ocml_fabs_f64
+double l3 = fabs(x);
+  }
+}
+
+void test_math_f32(float x) {
+// CHECK-LABEL: define {{.*}}test_math_f32
+#pragma omp target
+  {
+// CHECK-C: call double @__ocml_sin_f64
+// CHECK-CPP: call float @__ocml_sin_f32
+float l1 = sin(x);
+// CHECK-C: call double @__ocml_cos_f64
+// CHECK-CPP: call float @__ocml_cos_f32
+float l2 = cos(x);
+// CHECK-C: call double @__ocml_fabs_f64
+// CHECK-CPP: call float @__ocml_fabs_f32
+float l3 = fabs(x);
+  }
+}
+void test_math_f32_suffix(float x) {
+// CHECK-LABEL: define {{.*}}test_math_f32_suffix
+#pragma omp target
+  {
+// CHECK: call float @__ocml_sin_f32
+float l1 = sinf(x);
+// CHECK: call float @__ocml_cos_f32
+float l2 = cosf(x);
+// CHECK: call float @__ocml_fabs_f32
+float l3 = fabsf(x);
+  }
+}
Index: clang/test/Headers/Inputs/include/utility
===
--- /dev/null
+++ clang/test/Headers/Inputs/include/utility
@@ -0,0 +1,2 @@
+#pragma once
+
Index: clang/test/Headers/Inputs/include/cstdlib
===
--- clang/test/Headers/Inputs/include/cstdlib
+++ clang/test/Headers/Inputs/include/cstdlib
@@ -23,9 +23,13 @@
 inline long long
 abs(long long __x) { return __builtin_llabs (__x); }
 
+// amdgcn already provides 

[PATCH] D104904: [OpenMP][AMDGCN] Initial math headers support

2021-06-28 Thread Pushpinder Singh via Phabricator via cfe-commits
pdhaliwal updated this revision to Diff 354802.
pdhaliwal added a comment.

Typo


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D104904/new/

https://reviews.llvm.org/D104904

Files:
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/lib/Headers/__clang_hip_cmath.h
  clang/lib/Headers/__clang_hip_math.h
  clang/lib/Headers/openmp_wrappers/__clang_openmp_device_functions.h
  clang/lib/Headers/openmp_wrappers/cmath
  clang/lib/Headers/openmp_wrappers/math.h
  clang/test/Headers/Inputs/include/algorithm
  clang/test/Headers/Inputs/include/cstdlib
  clang/test/Headers/Inputs/include/utility
  clang/test/Headers/amdgcn_openmp_device_math.c
  clang/test/Headers/openmp_device_math_isnan.cpp

Index: clang/test/Headers/openmp_device_math_isnan.cpp
===
--- clang/test/Headers/openmp_device_math_isnan.cpp
+++ clang/test/Headers/openmp_device_math_isnan.cpp
@@ -21,14 +21,14 @@
 double math(float f, double d) {
   double r = 0;
   // INT_RETURN: call i32 @__nv_isnanf(float
-  // AMD_INT_RETURN: call i32 @_{{.*}}isnanf(float
+  // AMD_INT_RETURN: call i32 @__ocml_isnan_f32(float
   // BOOL_RETURN: call i32 @__nv_isnanf(float
-  // AMD_BOOL_RETURN: call zeroext i1 @_{{.*}}isnanf(float
+  // AMD_BOOL_RETURN: call i32 @__ocml_isnan_f32(float
   r += std::isnan(f);
   // INT_RETURN: call i32 @__nv_isnand(double
-  // AMD_INT_RETURN: call i32 @_{{.*}}isnand(double
+  // AMD_INT_RETURN: call i32 @__ocml_isnan_f64(double
   // BOOL_RETURN: call i32 @__nv_isnand(double
-  // AMD_BOOL_RETURN: call zeroext i1 @_{{.*}}isnand(double
+  // AMD_BOOL_RETURN: call i32 @__ocml_isnan_f64(double
   r += std::isnan(d);
   return r;
 }
Index: clang/test/Headers/amdgcn_openmp_device_math.c
===
--- /dev/null
+++ clang/test/Headers/amdgcn_openmp_device_math.c
@@ -0,0 +1,51 @@
+// RUN: %clang_cc1 -internal-isystem %S/Inputs/include -x c -fopenmp -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-host.bc
+// RUN: %clang_cc1 -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -x c -fopenmp -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-host.bc -o - | FileCheck %s --check-prefixes=CHECK-C,CHECK
+// RUN: %clang_cc1 -internal-isystem %S/Inputs/include -x c++ -fopenmp -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-host.bc
+// RUN: %clang_cc1 -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -x c++ -fopenmp -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-host.bc -o - | FileCheck %s --check-prefixes=CHECK-CPP,CHECK
+
+#ifdef __cplusplus
+#include 
+#else
+#include 
+#endif
+
+void test_math_f64(double x) {
+// CHECK-LABEL: define {{.*}}test_math_f64
+#pragma omp target
+  {
+// CHECK: call double @__ocml_sin_f64
+double l1 = sin(x);
+// CHECK: call double @__ocml_cos_f64
+double l2 = cos(x);
+// CHECK: call double @__ocml_fabs_f64
+double l3 = fabs(x);
+  }
+}
+
+void test_math_f32(float x) {
+// CHECK-LABEL: define {{.*}}test_math_f32
+#pragma omp target
+  {
+// CHECK-C: call double @__ocml_sin_f64
+// CHECK-CPP: call float @__ocml_sin_f32
+float l1 = sin(x);
+// CHECK-C: call double @__ocml_cos_f64
+// CHECK-CPP: call float @__ocml_cos_f32
+float l2 = cos(x);
+// CHECK-C: call double @__ocml_fabs_f64
+// CHECK-CPP: call float @__ocml_fabs_f32
+float l3 = fabs(x);
+  }
+}
+void test_math_f32_suffix(float x) {
+// CHECK-LABEL: define {{.*}}test_math_f32_suffix
+#pragma omp target
+  {
+// CHECK: call float @__ocml_sin_f32
+float l1 = sinf(x);
+// CHECK: call float @__ocml_cos_f32
+float l2 = cosf(x);
+// CHECK: call float @__ocml_fabs_f32
+float l3 = fabsf(x);
+  }
+}
Index: clang/test/Headers/Inputs/include/utility
===
--- /dev/null
+++ clang/test/Headers/Inputs/include/utility
@@ -0,0 +1,2 @@
+#pragma once
+
Index: clang/test/Headers/Inputs/include/cstdlib
===
--- clang/test/Headers/Inputs/include/cstdlib
+++ clang/test/Headers/Inputs/include/cstdlib
@@ -23,9 +23,13 @@
 inline long long
 abs(long long __x) { return __builtin_llabs (__x); }
 
+// amdgcn already provides definition of fabs
+#ifndef __AMDGCN__
 float fabs(float __x) { return __builtin_fabs(__x); }
+#endif
 
 float abs(float __x) { return 

[PATCH] D104904: [OpenMP][AMDGCN] Initial math headers support

2021-06-28 Thread Pushpinder Singh via Phabricator via cfe-commits
pdhaliwal added inline comments.



Comment at: clang/lib/Headers/__clang_hip_math.h:29
+#else
 #define __DEVICE__ static __device__ inline __attribute__((always_inline))
+#endif

JonChesterfield wrote:
> wonder if HIP would benefit from nothrow here
Would like to keep hip changes minimal in this patch.



Comment at: clang/lib/Headers/__clang_hip_math.h:35
+#ifdef __OPENMP_AMDGCN__
+#define __RETURN_TYPE int
+#else

jdoerfert wrote:
> JonChesterfield wrote:
> > I'd expect openmp to match the cplusplus/c distinction here, as openmp 
> > works on C source
> ^ Agreed. Though, we use a different trick because it's unfortunately not as 
> straight forward always and can be decided based on the C vs C++.
This is somewhat tricky. Since declaration of `__finite/__isnan /__isinff` is 
with int return type in standard library (and the corresponding methods in C++ 
seems to be isfinite, isnan and isinf with bool return type), the compiler 
fails to resolve these functions when using bool. I don't know how HIP is 
working.

__RETURN_TYPE macro is only being used with the following methods:
1. __finite
2. __isnan
3. __isinf
4. __signbit

and with the corresponding float versions.



Comment at: clang/lib/Headers/openmp_wrappers/cmath:83
+#include <__clang_hip_cmath.h>
+#undef __OPENMP_AMDGCN__
+

jdoerfert wrote:
> No match_any needed (here and elsewhere).
> 
> Also, don't we want all but the includes to be the same for both GPUs. Maybe 
> we have a device(kind(gpu)) variant and inside the nvptx and amdgpu just for 
> the respective include?
device(kind(gpu)) breaks nvptx and hip with lots of errors like below,

```
...
__clang_cuda_device_functions.h:29:40: error: use of undeclared identifier 
'__nvvm_vote_all'
...
```

Maybe I am doing something wrong.



Comment at: clang/test/Headers/Inputs/include/cstdlib:15
 
+#ifndef __AMDGCN__
 namespace std

jdoerfert wrote:
> JonChesterfield wrote:
> > I think I'd expect builtin_labs et al to work on amdgcn, are we missing 
> > lowering for them?
> Yeah, looks weird that we cannot compile this mock-up header.
From what I understand, hip is defining fabs to use ocml's version into the std 
namespace, which was already defined in this header. So that's causing multiple 
declaration error. I will wrap only fabs in the ifdef's


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D104904/new/

https://reviews.llvm.org/D104904

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D104904: [OpenMP][AMDGCN] Initial math headers support

2021-06-28 Thread Pushpinder Singh via Phabricator via cfe-commits
pdhaliwal updated this revision to Diff 354801.
pdhaliwal marked 2 inline comments as done.
pdhaliwal added a comment.

Addressed review comments.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D104904/new/

https://reviews.llvm.org/D104904

Files:
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/lib/Headers/__clang_hip_cmath.h
  clang/lib/Headers/__clang_hip_math.h
  clang/lib/Headers/openmp_wrappers/__clang_openmp_device_functions.h
  clang/lib/Headers/openmp_wrappers/cmath
  clang/lib/Headers/openmp_wrappers/math.h
  clang/test/Headers/Inputs/include/algorithm
  clang/test/Headers/Inputs/include/cstdlib
  clang/test/Headers/Inputs/include/utility
  clang/test/Headers/amdgcn_openmp_device_math.c
  clang/test/Headers/openmp_device_math_isnan.cpp

Index: clang/test/Headers/openmp_device_math_isnan.cpp
===
--- clang/test/Headers/openmp_device_math_isnan.cpp
+++ clang/test/Headers/openmp_device_math_isnan.cpp
@@ -21,14 +21,14 @@
 double math(float f, double d) {
   double r = 0;
   // INT_RETURN: call i32 @__nv_isnanf(float
-  // AMD_INT_RETURN: call i32 @_{{.*}}isnanf(float
+  // AMD_INT_RETURN: call i32 @__ocml_isnan_f32(float
   // BOOL_RETURN: call i32 @__nv_isnanf(float
-  // AMD_BOOL_RETURN: call zeroext i1 @_{{.*}}isnanf(float
+  // AMD_BOOL_RETURN: call i32 @__ocml_isnan_f32(float
   r += std::isnan(f);
   // INT_RETURN: call i32 @__nv_isnand(double
-  // AMD_INT_RETURN: call i32 @_{{.*}}isnand(double
+  // AMD_INT_RETURN: call i32 @__ocml_isnan_f64(double
   // BOOL_RETURN: call i32 @__nv_isnand(double
-  // AMD_BOOL_RETURN: call zeroext i1 @_{{.*}}isnand(double
+  // AMD_BOOL_RETURN: call i32 @__ocml_isnan_f64(double
   r += std::isnan(d);
   return r;
 }
Index: clang/test/Headers/amdgcn_openmp_device_math.c
===
--- /dev/null
+++ clang/test/Headers/amdgcn_openmp_device_math.c
@@ -0,0 +1,51 @@
+// RUN: %clang_cc1 -internal-isystem %S/Inputs/include -x c -fopenmp -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-host.bc
+// RUN: %clang_cc1 -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -x c -fopenmp -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-host.bc -o - | FileCheck %s --check-prefixes=CHECK-C,CHECK
+// RUN: %clang_cc1 -internal-isystem %S/Inputs/include -x c++ -fopenmp -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-host.bc
+// RUN: %clang_cc1 -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -x c++ -fopenmp -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-host.bc -o - | FileCheck %s --check-prefixes=CHECK-CPP,CHECK
+
+#ifdef __cplusplus
+#include 
+#else
+#include 
+#endif
+
+void test_math_f64(double x) {
+// CHECK-LABEL: define {{.*}}test_math_f64
+#pragma omp target
+  {
+// CHECK: call double @__ocml_sin_f64
+double l1 = sin(x);
+// CHECK: call double @__ocml_cos_f64
+double l2 = cos(x);
+// CHECK: call double @__ocml_fabs_f64
+double l3 = fabs(x);
+  }
+}
+
+void test_math_f32(float x) {
+// CHECK-LABEL: define {{.*}}test_math_f32
+#pragma omp target
+  {
+// CHECK-C: call double @__ocml_sin_f64
+// CHECK-CPP: call float @__ocml_sin_f32
+float l1 = sin(x);
+// CHECK-C: call double @__ocml_cos_f64
+// CHECK-CPP: call float @__ocml_cos_f32
+float l2 = cos(x);
+// CHECK-C: call double @__ocml_fabs_f64
+// CHECK-CPP: call float @__ocml_fabs_f32
+float l3 = fabs(x);
+  }
+}
+void test_math_f32_suffix(float x) {
+// CHECK-LABEL: define {{.*}}test_math_f32_suffix
+#pragma omp target
+  {
+// CHECK: call float @__ocml_sin_f32
+float l1 = sinf(x);
+// CHECK: call float @__ocml_cos_f32
+float l2 = cosf(x);
+// CHECK: call float @__ocml_fabs_f32
+float l3 = fabsf(x);
+  }
+}
Index: clang/test/Headers/Inputs/include/utility
===
--- /dev/null
+++ clang/test/Headers/Inputs/include/utility
@@ -0,0 +1,2 @@
+#pragma once
+
Index: clang/test/Headers/Inputs/include/cstdlib
===
--- clang/test/Headers/Inputs/include/cstdlib
+++ clang/test/Headers/Inputs/include/cstdlib
@@ -23,9 +23,13 @@
 inline long long
 abs(long long __x) { return __builtin_llabs (__x); }
 
+// amdgcn already provides definition of fabs
+#ifndef __AMDGCN__
 float fabs(float __x) { return 

[PATCH] D104904: [OpenMP][AMDGCN] Initial math headers support

2021-06-25 Thread Pushpinder Singh via Phabricator via cfe-commits
pdhaliwal updated this revision to Diff 354471.
pdhaliwal added a comment.

Fix format errors


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D104904/new/

https://reviews.llvm.org/D104904

Files:
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/lib/Headers/__clang_hip_cmath.h
  clang/lib/Headers/__clang_hip_math.h
  clang/lib/Headers/openmp_wrappers/__clang_openmp_device_functions.h
  clang/lib/Headers/openmp_wrappers/cmath
  clang/lib/Headers/openmp_wrappers/math.h
  clang/test/Headers/Inputs/include/algorithm
  clang/test/Headers/Inputs/include/cstdlib
  clang/test/Headers/Inputs/include/utility
  clang/test/Headers/amdgcn_openmp_device_math.c
  clang/test/Headers/openmp_device_math_isnan.cpp

Index: clang/test/Headers/openmp_device_math_isnan.cpp
===
--- clang/test/Headers/openmp_device_math_isnan.cpp
+++ clang/test/Headers/openmp_device_math_isnan.cpp
@@ -21,14 +21,14 @@
 double math(float f, double d) {
   double r = 0;
   // INT_RETURN: call i32 @__nv_isnanf(float
-  // AMD_INT_RETURN: call i32 @_{{.*}}isnanf(float
+  // AMD_INT_RETURN: call i32 @__ocml_isnan_f32(float
   // BOOL_RETURN: call i32 @__nv_isnanf(float
-  // AMD_BOOL_RETURN: call zeroext i1 @_{{.*}}isnanf(float
+  // AMD_BOOL_RETURN: call i32 @__ocml_isnan_f32(float
   r += std::isnan(f);
   // INT_RETURN: call i32 @__nv_isnand(double
-  // AMD_INT_RETURN: call i32 @_{{.*}}isnand(double
+  // AMD_INT_RETURN: call i32 @__ocml_isnan_f64(double
   // BOOL_RETURN: call i32 @__nv_isnand(double
-  // AMD_BOOL_RETURN: call zeroext i1 @_{{.*}}isnand(double
+  // AMD_BOOL_RETURN: call i32 @__ocml_isnan_f64(double
   r += std::isnan(d);
   return r;
 }
Index: clang/test/Headers/amdgcn_openmp_device_math.c
===
--- /dev/null
+++ clang/test/Headers/amdgcn_openmp_device_math.c
@@ -0,0 +1,51 @@
+// RUN: %clang_cc1 -internal-isystem %S/Inputs/include -x c -fopenmp -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-host.bc
+// RUN: %clang_cc1 -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -x c -fopenmp -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-host.bc -o - | FileCheck %s --check-prefixes=CHECK-C,CHECK
+// RUN: %clang_cc1 -internal-isystem %S/Inputs/include -x c++ -fopenmp -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-host.bc
+// RUN: %clang_cc1 -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -x c++ -fopenmp -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-host.bc -o - | FileCheck %s --check-prefixes=CHECK-CPP,CHECK
+
+#ifdef __cplusplus
+#include 
+#else
+#include 
+#endif
+
+void test_math_f64(double x) {
+// CHECK-LABEL: define {{.*}}test_math_f64
+#pragma omp target
+  {
+// CHECK: call double @__ocml_sin_f64
+double l1 = sin(x);
+// CHECK: call double @__ocml_cos_f64
+double l2 = cos(x);
+// CHECK: call double @__ocml_fabs_f64
+double l3 = fabs(x);
+  }
+}
+
+void test_math_f32(float x) {
+// CHECK-LABEL: define {{.*}}test_math_f32
+#pragma omp target
+  {
+// CHECK-C: call double @__ocml_sin_f64
+// CHECK-CPP: call float @__ocml_sin_f32
+float l1 = sin(x);
+// CHECK-C: call double @__ocml_cos_f64
+// CHECK-CPP: call float @__ocml_cos_f32
+float l2 = cos(x);
+// CHECK-C: call double @__ocml_fabs_f64
+// CHECK-CPP: call float @__ocml_fabs_f32
+float l3 = fabs(x);
+  }
+}
+void test_math_f32_suffix(float x) {
+// CHECK-LABEL: define {{.*}}test_math_f32_suffix
+#pragma omp target
+  {
+// CHECK: call float @__ocml_sin_f32
+float l1 = sinf(x);
+// CHECK: call float @__ocml_cos_f32
+float l2 = cosf(x);
+// CHECK: call float @__ocml_fabs_f32
+float l3 = fabsf(x);
+  }
+}
Index: clang/test/Headers/Inputs/include/utility
===
--- /dev/null
+++ clang/test/Headers/Inputs/include/utility
@@ -0,0 +1,2 @@
+#pragma once
+
Index: clang/test/Headers/Inputs/include/cstdlib
===
--- clang/test/Headers/Inputs/include/cstdlib
+++ clang/test/Headers/Inputs/include/cstdlib
@@ -12,6 +12,7 @@
 extern float fabs (float __x) __attribute__ ((__const__)) ;
 #endif
 
+#ifndef __AMDGCN__
 namespace std
 {
 
@@ -29,3 +30,5 @@
 double abs(double __x) { return fabs(__x); }
 
 }
+
+#endif
Index: 

[PATCH] D104904: [OpenMP][AMDGCN] Initial math headers support

2021-06-25 Thread Pushpinder Singh via Phabricator via cfe-commits
pdhaliwal created this revision.
pdhaliwal added reviewers: JonChesterfield, ye-luo, ronlieb, gregrodgers, 
jdoerfert.
Herald added subscribers: guansong, yaxunl, jvesely.
pdhaliwal requested review of this revision.
Herald added subscribers: cfe-commits, sstefan1.
Herald added a project: clang.

With this patch, OpenMP on AMDGCN will use the math functions
provided by ROCm ocml library. Linking device code to the ocml will be
done in the next patch.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D104904

Files:
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/lib/Headers/__clang_hip_cmath.h
  clang/lib/Headers/__clang_hip_math.h
  clang/lib/Headers/openmp_wrappers/__clang_openmp_device_functions.h
  clang/lib/Headers/openmp_wrappers/cmath
  clang/lib/Headers/openmp_wrappers/math.h
  clang/test/Headers/Inputs/include/algorithm
  clang/test/Headers/Inputs/include/cstdlib
  clang/test/Headers/Inputs/include/utility
  clang/test/Headers/amdgcn_openmp_device_math.c
  clang/test/Headers/openmp_device_math_isnan.cpp

Index: clang/test/Headers/openmp_device_math_isnan.cpp
===
--- clang/test/Headers/openmp_device_math_isnan.cpp
+++ clang/test/Headers/openmp_device_math_isnan.cpp
@@ -21,14 +21,14 @@
 double math(float f, double d) {
   double r = 0;
   // INT_RETURN: call i32 @__nv_isnanf(float
-  // AMD_INT_RETURN: call i32 @_{{.*}}isnanf(float
+  // AMD_INT_RETURN: call i32 @__ocml_isnan_f32(float
   // BOOL_RETURN: call i32 @__nv_isnanf(float
-  // AMD_BOOL_RETURN: call zeroext i1 @_{{.*}}isnanf(float
+  // AMD_BOOL_RETURN: call i32 @__ocml_isnan_f32(float
   r += std::isnan(f);
   // INT_RETURN: call i32 @__nv_isnand(double
-  // AMD_INT_RETURN: call i32 @_{{.*}}isnand(double
+  // AMD_INT_RETURN: call i32 @__ocml_isnan_f64(double
   // BOOL_RETURN: call i32 @__nv_isnand(double
-  // AMD_BOOL_RETURN: call zeroext i1 @_{{.*}}isnand(double
+  // AMD_BOOL_RETURN: call i32 @__ocml_isnan_f64(double
   r += std::isnan(d);
   return r;
 }
Index: clang/test/Headers/amdgcn_openmp_device_math.c
===
--- /dev/null
+++ clang/test/Headers/amdgcn_openmp_device_math.c
@@ -0,0 +1,51 @@
+// RUN: %clang_cc1 -internal-isystem %S/Inputs/include -x c -fopenmp -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-host.bc
+// RUN: %clang_cc1 -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -x c -fopenmp -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-host.bc -o - | FileCheck %s --check-prefixes=CHECK-C,CHECK
+// RUN: %clang_cc1 -internal-isystem %S/Inputs/include -x c++ -fopenmp -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-host.bc
+// RUN: %clang_cc1 -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -x c++ -fopenmp -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-host.bc -o - | FileCheck %s --check-prefixes=CHECK-CPP,CHECK
+
+#ifdef __cplusplus
+#include 
+#else
+#include 
+#endif
+
+void test_math_f64(double x) {
+  // CHECK-LABEL: define {{.*}}test_math_f64
+  #pragma omp target
+  {
+// CHECK: call double @__ocml_sin_f64
+double l1 = sin(x);
+// CHECK: call double @__ocml_cos_f64
+double l2 = cos(x);
+// CHECK: call double @__ocml_fabs_f64
+double l3 = fabs(x);
+  }
+}
+
+void test_math_f32(float x) {
+  // CHECK-LABEL: define {{.*}}test_math_f32
+  #pragma omp target
+  {
+// CHECK-C: call double @__ocml_sin_f64
+// CHECK-CPP: call float @__ocml_sin_f32
+float l1 = sin(x);
+// CHECK-C: call double @__ocml_cos_f64
+// CHECK-CPP: call float @__ocml_cos_f32
+float l2 = cos(x);
+// CHECK-C: call double @__ocml_fabs_f64
+// CHECK-CPP: call float @__ocml_fabs_f32
+float l3 = fabs(x);
+  }
+}
+void test_math_f32_suffix(float x) {
+  // CHECK-LABEL: define {{.*}}test_math_f32_suffix
+  #pragma omp target
+  {
+// CHECK: call float @__ocml_sin_f32
+float l1 = sinf(x);
+// CHECK: call float @__ocml_cos_f32
+float l2 = cosf(x);
+// CHECK: call float @__ocml_fabs_f32
+float l3 = fabsf(x);
+  }
+}
Index: clang/test/Headers/Inputs/include/utility
===
--- /dev/null
+++ clang/test/Headers/Inputs/include/utility
@@ -0,0 +1,2 @@
+#pragma once
+
Index: clang/test/Headers/Inputs/include/cstdlib
===
--- 

[PATCH] D102065: [AMDGPU][OpenMP] Emit textual IR for -emit-llvm -S

2021-05-12 Thread Pushpinder Singh via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rG10c779d2065f: [AMDGPU][OpenMP] Emit textual IR for 
-emit-llvm -S (authored by pdhaliwal).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D102065/new/

https://reviews.llvm.org/D102065

Files:
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/test/Driver/amdgpu-openmp-toolchain.c


Index: clang/test/Driver/amdgpu-openmp-toolchain.c
===
--- clang/test/Driver/amdgpu-openmp-toolchain.c
+++ clang/test/Driver/amdgpu-openmp-toolchain.c
@@ -6,7 +6,7 @@
 // verify the tools invocations
 // CHECK: clang{{.*}}"-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-x" 
"c"{{.*}}
 // CHECK: clang{{.*}}"-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-x" 
"ir"{{.*}}
-// CHECK: clang{{.*}}"-cc1"{{.*}}"-triple" 
"amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx906" "-fcuda-is-device" 
"-mlink-builtin-bitcode"{{.*}}libomptarget-amdgcn-gfx906.bc"{{.*}}
+// CHECK: clang{{.*}}"-cc1"{{.*}}"-triple" 
"amdgcn-amd-amdhsa"{{.*}}"-emit-llvm-bc"{{.*}}"-target-cpu" "gfx906" 
"-fcuda-is-device"{{.*}}"-mlink-builtin-bitcode"{{.*}}libomptarget-amdgcn-gfx906.bc"{{.*}}
 // CHECK: llvm-link{{.*}}"-o" 
"{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906-linked-{{.*}}.bc"
 // CHECK: llc{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906-linked-{{.*}}.bc" 
"-mtriple=amdgcn-amd-amdhsa" "-mcpu=gfx906" "-filetype=obj" 
"-o"{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906-{{.*}}.o"
 // CHECK: lld{{.*}}"-flavor" "gnu" "--no-undefined" "-shared" 
"-o"{{.*}}amdgpu-openmp-toolchain-{{.*}}.out" 
"{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906-{{.*}}.o"
@@ -71,3 +71,6 @@
 // CHECK-C: "x86_64-unknown-linux-gnu" - "clang"
 // CHECK-C: "x86_64-unknown-linux-gnu" - "clang::as"
 // CHECK-C: "x86_64-unknown-linux-gnu" - "offload bundler"
+
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -emit-llvm -S -fopenmp 
-fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa 
-march=gfx803 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-EMIT-LLVM-IR
+// CHECK-EMIT-LLVM-IR: clang{{.*}}"-cc1"{{.*}}"-triple" 
"amdgcn-amd-amdhsa"{{.*}}"-emit-llvm"
Index: clang/lib/Driver/ToolChains/Clang.cpp
===
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -4394,7 +4394,13 @@
   CmdArgs.push_back("-emit-llvm");
 } else if (JA.getType() == types::TY_LLVM_BC ||
JA.getType() == types::TY_LTO_BC) {
-  CmdArgs.push_back("-emit-llvm-bc");
+  // Emit textual llvm IR for AMDGPU offloading for -emit-llvm -S
+  if (Triple.isAMDGCN() && IsOpenMPDevice && Args.hasArg(options::OPT_S) &&
+  Args.hasArg(options::OPT_emit_llvm)) {
+CmdArgs.push_back("-emit-llvm");
+  } else {
+CmdArgs.push_back("-emit-llvm-bc");
+  }
 } else if (JA.getType() == types::TY_IFS ||
JA.getType() == types::TY_IFS_CPP) {
   StringRef ArgStr =


Index: clang/test/Driver/amdgpu-openmp-toolchain.c
===
--- clang/test/Driver/amdgpu-openmp-toolchain.c
+++ clang/test/Driver/amdgpu-openmp-toolchain.c
@@ -6,7 +6,7 @@
 // verify the tools invocations
 // CHECK: clang{{.*}}"-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-x" "c"{{.*}}
 // CHECK: clang{{.*}}"-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-x" "ir"{{.*}}
-// CHECK: clang{{.*}}"-cc1"{{.*}}"-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx906" "-fcuda-is-device" "-mlink-builtin-bitcode"{{.*}}libomptarget-amdgcn-gfx906.bc"{{.*}}
+// CHECK: clang{{.*}}"-cc1"{{.*}}"-triple" "amdgcn-amd-amdhsa"{{.*}}"-emit-llvm-bc"{{.*}}"-target-cpu" "gfx906" "-fcuda-is-device"{{.*}}"-mlink-builtin-bitcode"{{.*}}libomptarget-amdgcn-gfx906.bc"{{.*}}
 // CHECK: llvm-link{{.*}}"-o" "{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906-linked-{{.*}}.bc"
 // CHECK: llc{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906-linked-{{.*}}.bc" "-mtriple=amdgcn-amd-amdhsa" "-mcpu=gfx906" "-filetype=obj" "-o"{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906-{{.*}}.o"
 // CHECK: lld{{.*}}"-flavor" "gnu" "--no-undefined" "-shared" "-o"{{.*}}amdgpu-openmp-toolchain-{{.*}}.out" "{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906-{{.*}}.o"
@@ -71,3 +71,6 @@
 // CHECK-C: "x86_64-unknown-linux-gnu" - "clang"
 // CHECK-C: "x86_64-unknown-linux-gnu" - "clang::as"
 // CHECK-C: "x86_64-unknown-linux-gnu" - "offload bundler"
+
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -emit-llvm -S -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx803 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-EMIT-LLVM-IR
+// CHECK-EMIT-LLVM-IR: clang{{.*}}"-cc1"{{.*}}"-triple" "amdgcn-amd-amdhsa"{{.*}}"-emit-llvm"
Index: clang/lib/Driver/ToolChains/Clang.cpp
===
--- 

[PATCH] D102065: [AMDGPU][OpenMP] Emit textual IR for -emit-llvm -S

2021-05-12 Thread Pushpinder Singh via Phabricator via cfe-commits
pdhaliwal added a comment.

Tested on gfx906. The libomptarget tests are working as expected.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D102065/new/

https://reviews.llvm.org/D102065

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D102065: [AMDGPU][OpenMP] Emit textual IR for -emit-llvm -S

2021-05-12 Thread Pushpinder Singh via Phabricator via cfe-commits
pdhaliwal updated this revision to Diff 344763.
pdhaliwal added a comment.

Fixed the if-else logic


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D102065/new/

https://reviews.llvm.org/D102065

Files:
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/test/Driver/amdgpu-openmp-toolchain.c


Index: clang/test/Driver/amdgpu-openmp-toolchain.c
===
--- clang/test/Driver/amdgpu-openmp-toolchain.c
+++ clang/test/Driver/amdgpu-openmp-toolchain.c
@@ -6,7 +6,7 @@
 // verify the tools invocations
 // CHECK: clang{{.*}}"-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-x" 
"c"{{.*}}
 // CHECK: clang{{.*}}"-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-x" 
"ir"{{.*}}
-// CHECK: clang{{.*}}"-cc1"{{.*}}"-triple" 
"amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx906" "-fcuda-is-device" 
"-mlink-builtin-bitcode"{{.*}}libomptarget-amdgcn-gfx906.bc"{{.*}}
+// CHECK: clang{{.*}}"-cc1"{{.*}}"-triple" 
"amdgcn-amd-amdhsa"{{.*}}"-emit-llvm-bc"{{.*}}"-target-cpu" "gfx906" 
"-fcuda-is-device"{{.*}}"-mlink-builtin-bitcode"{{.*}}libomptarget-amdgcn-gfx906.bc"{{.*}}
 // CHECK: llvm-link{{.*}}"-o" 
"{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906-linked-{{.*}}.bc"
 // CHECK: llc{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906-linked-{{.*}}.bc" 
"-mtriple=amdgcn-amd-amdhsa" "-mcpu=gfx906" "-filetype=obj" 
"-o"{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906-{{.*}}.o"
 // CHECK: lld{{.*}}"-flavor" "gnu" "--no-undefined" "-shared" 
"-o"{{.*}}amdgpu-openmp-toolchain-{{.*}}.out" 
"{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906-{{.*}}.o"
@@ -71,3 +71,6 @@
 // CHECK-C: "x86_64-unknown-linux-gnu" - "clang"
 // CHECK-C: "x86_64-unknown-linux-gnu" - "clang::as"
 // CHECK-C: "x86_64-unknown-linux-gnu" - "offload bundler"
+
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -emit-llvm -S -fopenmp 
-fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa 
-march=gfx803 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-EMIT-LLVM-IR
+// CHECK-EMIT-LLVM-IR: clang{{.*}}"-cc1"{{.*}}"-triple" 
"amdgcn-amd-amdhsa"{{.*}}"-emit-llvm"
Index: clang/lib/Driver/ToolChains/Clang.cpp
===
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -4394,7 +4394,13 @@
   CmdArgs.push_back("-emit-llvm");
 } else if (JA.getType() == types::TY_LLVM_BC ||
JA.getType() == types::TY_LTO_BC) {
-  CmdArgs.push_back("-emit-llvm-bc");
+  // Emit textual llvm IR for AMDGPU offloading for -emit-llvm -S
+  if (Triple.isAMDGCN() && IsOpenMPDevice && Args.hasArg(options::OPT_S) &&
+  Args.hasArg(options::OPT_emit_llvm)) {
+CmdArgs.push_back("-emit-llvm");
+  } else {
+CmdArgs.push_back("-emit-llvm-bc");
+  }
 } else if (JA.getType() == types::TY_IFS ||
JA.getType() == types::TY_IFS_CPP) {
   StringRef ArgStr =


Index: clang/test/Driver/amdgpu-openmp-toolchain.c
===
--- clang/test/Driver/amdgpu-openmp-toolchain.c
+++ clang/test/Driver/amdgpu-openmp-toolchain.c
@@ -6,7 +6,7 @@
 // verify the tools invocations
 // CHECK: clang{{.*}}"-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-x" "c"{{.*}}
 // CHECK: clang{{.*}}"-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-x" "ir"{{.*}}
-// CHECK: clang{{.*}}"-cc1"{{.*}}"-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx906" "-fcuda-is-device" "-mlink-builtin-bitcode"{{.*}}libomptarget-amdgcn-gfx906.bc"{{.*}}
+// CHECK: clang{{.*}}"-cc1"{{.*}}"-triple" "amdgcn-amd-amdhsa"{{.*}}"-emit-llvm-bc"{{.*}}"-target-cpu" "gfx906" "-fcuda-is-device"{{.*}}"-mlink-builtin-bitcode"{{.*}}libomptarget-amdgcn-gfx906.bc"{{.*}}
 // CHECK: llvm-link{{.*}}"-o" "{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906-linked-{{.*}}.bc"
 // CHECK: llc{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906-linked-{{.*}}.bc" "-mtriple=amdgcn-amd-amdhsa" "-mcpu=gfx906" "-filetype=obj" "-o"{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906-{{.*}}.o"
 // CHECK: lld{{.*}}"-flavor" "gnu" "--no-undefined" "-shared" "-o"{{.*}}amdgpu-openmp-toolchain-{{.*}}.out" "{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906-{{.*}}.o"
@@ -71,3 +71,6 @@
 // CHECK-C: "x86_64-unknown-linux-gnu" - "clang"
 // CHECK-C: "x86_64-unknown-linux-gnu" - "clang::as"
 // CHECK-C: "x86_64-unknown-linux-gnu" - "offload bundler"
+
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -emit-llvm -S -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx803 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-EMIT-LLVM-IR
+// CHECK-EMIT-LLVM-IR: clang{{.*}}"-cc1"{{.*}}"-triple" "amdgcn-amd-amdhsa"{{.*}}"-emit-llvm"
Index: clang/lib/Driver/ToolChains/Clang.cpp
===
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -4394,7 +4394,13 @@
   

[PATCH] D102065: [AMDGPU][OpenMP] Emit textual IR for -emit-llvm -S

2021-05-11 Thread Pushpinder Singh via Phabricator via cfe-commits
pdhaliwal added a comment.

This is not working as expected. And has resulted in broken libomptarget tests. 
Reverting this until I find a different fix.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D102065/new/

https://reviews.llvm.org/D102065

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D102065: [AMDGPU][OpenMP] Emit textual IR for -emit-llvm -S

2021-05-10 Thread Pushpinder Singh via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG7f78e409d028: [AMDGPU][OpenMP] Emit textual IR for 
-emit-llvm -S (authored by pdhaliwal).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D102065/new/

https://reviews.llvm.org/D102065

Files:
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/test/Driver/amdgpu-openmp-toolchain.c


Index: clang/test/Driver/amdgpu-openmp-toolchain.c
===
--- clang/test/Driver/amdgpu-openmp-toolchain.c
+++ clang/test/Driver/amdgpu-openmp-toolchain.c
@@ -71,3 +71,6 @@
 // CHECK-C: "x86_64-unknown-linux-gnu" - "clang"
 // CHECK-C: "x86_64-unknown-linux-gnu" - "clang::as"
 // CHECK-C: "x86_64-unknown-linux-gnu" - "offload bundler"
+
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -emit-llvm -S -fopenmp 
-fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa 
-march=gfx803 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-EMIT-LLVM-IR
+// CHECK-EMIT-LLVM-IR: {{.*}}clang-13" "-cc1" "-triple" 
"amdgcn-amd-amdhsa"{{.*}}"-emit-llvm"
Index: clang/lib/Driver/ToolChains/Clang.cpp
===
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -4394,7 +4394,13 @@
   CmdArgs.push_back("-emit-llvm");
 } else if (JA.getType() == types::TY_LLVM_BC ||
JA.getType() == types::TY_LTO_BC) {
-  CmdArgs.push_back("-emit-llvm-bc");
+  // Emit textual llvm IR for AMDGPU offloading for -emit-llvm -S
+  if (Triple.isAMDGCN() && IsOpenMPDevice) {
+if (Args.hasArg(options::OPT_S) && Args.hasArg(options::OPT_emit_llvm))
+  CmdArgs.push_back("-emit-llvm");
+  } else {
+CmdArgs.push_back("-emit-llvm-bc");
+  }
 } else if (JA.getType() == types::TY_IFS ||
JA.getType() == types::TY_IFS_CPP) {
   StringRef ArgStr =


Index: clang/test/Driver/amdgpu-openmp-toolchain.c
===
--- clang/test/Driver/amdgpu-openmp-toolchain.c
+++ clang/test/Driver/amdgpu-openmp-toolchain.c
@@ -71,3 +71,6 @@
 // CHECK-C: "x86_64-unknown-linux-gnu" - "clang"
 // CHECK-C: "x86_64-unknown-linux-gnu" - "clang::as"
 // CHECK-C: "x86_64-unknown-linux-gnu" - "offload bundler"
+
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -emit-llvm -S -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx803 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-EMIT-LLVM-IR
+// CHECK-EMIT-LLVM-IR: {{.*}}clang-13" "-cc1" "-triple" "amdgcn-amd-amdhsa"{{.*}}"-emit-llvm"
Index: clang/lib/Driver/ToolChains/Clang.cpp
===
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -4394,7 +4394,13 @@
   CmdArgs.push_back("-emit-llvm");
 } else if (JA.getType() == types::TY_LLVM_BC ||
JA.getType() == types::TY_LTO_BC) {
-  CmdArgs.push_back("-emit-llvm-bc");
+  // Emit textual llvm IR for AMDGPU offloading for -emit-llvm -S
+  if (Triple.isAMDGCN() && IsOpenMPDevice) {
+if (Args.hasArg(options::OPT_S) && Args.hasArg(options::OPT_emit_llvm))
+  CmdArgs.push_back("-emit-llvm");
+  } else {
+CmdArgs.push_back("-emit-llvm-bc");
+  }
 } else if (JA.getType() == types::TY_IFS ||
JA.getType() == types::TY_IFS_CPP) {
   StringRef ArgStr =
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D102067: [amdgpu-arch] Guard hsa.h with __has_include

2021-05-10 Thread Pushpinder Singh via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rGc711aa0f6f9d: [amdgpu-arch] Guard hsa.h with __has_include 
(authored by pdhaliwal).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D102067/new/

https://reviews.llvm.org/D102067

Files:
  clang/tools/amdgpu-arch/AMDGPUArch.cpp


Index: clang/tools/amdgpu-arch/AMDGPUArch.cpp
===
--- clang/tools/amdgpu-arch/AMDGPUArch.cpp
+++ clang/tools/amdgpu-arch/AMDGPUArch.cpp
@@ -11,7 +11,24 @@
 //
 
//===--===//
 
-#include 
+#if defined(__has_include)
+#if __has_include("hsa.h")
+#define HSA_HEADER_FOUND 1
+#include "hsa.h"
+#elif __has_include("hsa/hsa.h")
+#define HSA_HEADER_FOUND 1
+#include "hsa/hsa.h"
+#else
+#define HSA_HEADER_FOUND 0
+#endif
+#else
+#define HSA_HEADER_FOUND 0
+#endif
+
+#if !HSA_HEADER_FOUND
+int main() { return 1; }
+#else
+
 #include 
 #include 
 
@@ -57,3 +74,5 @@
   hsa_shut_down();
   return 0;
 }
+
+#endif


Index: clang/tools/amdgpu-arch/AMDGPUArch.cpp
===
--- clang/tools/amdgpu-arch/AMDGPUArch.cpp
+++ clang/tools/amdgpu-arch/AMDGPUArch.cpp
@@ -11,7 +11,24 @@
 //
 //===--===//
 
-#include 
+#if defined(__has_include)
+#if __has_include("hsa.h")
+#define HSA_HEADER_FOUND 1
+#include "hsa.h"
+#elif __has_include("hsa/hsa.h")
+#define HSA_HEADER_FOUND 1
+#include "hsa/hsa.h"
+#else
+#define HSA_HEADER_FOUND 0
+#endif
+#else
+#define HSA_HEADER_FOUND 0
+#endif
+
+#if !HSA_HEADER_FOUND
+int main() { return 1; }
+#else
+
 #include 
 #include 
 
@@ -57,3 +74,5 @@
   hsa_shut_down();
   return 0;
 }
+
+#endif
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D102067: [amdgpu-arch] Guard hsa.h with __has_include

2021-05-07 Thread Pushpinder Singh via Phabricator via cfe-commits
pdhaliwal updated this revision to Diff 343668.
pdhaliwal added a comment.

Added fallback in case __has_include is not defined or header is not found 
anywhere.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D102067/new/

https://reviews.llvm.org/D102067

Files:
  clang/tools/amdgpu-arch/AMDGPUArch.cpp


Index: clang/tools/amdgpu-arch/AMDGPUArch.cpp
===
--- clang/tools/amdgpu-arch/AMDGPUArch.cpp
+++ clang/tools/amdgpu-arch/AMDGPUArch.cpp
@@ -11,7 +11,24 @@
 //
 
//===--===//
 
-#include 
+#if defined(__has_include)
+#if __has_include("hsa.h")
+#define HSA_HEADER_FOUND 1
+#include "hsa.h"
+#elif __has_include("hsa/hsa.h")
+#define HSA_HEADER_FOUND 1
+#include "hsa/hsa.h"
+#else
+#define HSA_HEADER_FOUND 0
+#endif
+#else
+#define HSA_HEADER_FOUND 0
+#endif
+
+#if !HSA_HEADER_FOUND
+int main() { return 1; }
+#else
+
 #include 
 #include 
 
@@ -57,3 +74,5 @@
   hsa_shut_down();
   return 0;
 }
+
+#endif


Index: clang/tools/amdgpu-arch/AMDGPUArch.cpp
===
--- clang/tools/amdgpu-arch/AMDGPUArch.cpp
+++ clang/tools/amdgpu-arch/AMDGPUArch.cpp
@@ -11,7 +11,24 @@
 //
 //===--===//
 
-#include 
+#if defined(__has_include)
+#if __has_include("hsa.h")
+#define HSA_HEADER_FOUND 1
+#include "hsa.h"
+#elif __has_include("hsa/hsa.h")
+#define HSA_HEADER_FOUND 1
+#include "hsa/hsa.h"
+#else
+#define HSA_HEADER_FOUND 0
+#endif
+#else
+#define HSA_HEADER_FOUND 0
+#endif
+
+#if !HSA_HEADER_FOUND
+int main() { return 1; }
+#else
+
 #include 
 #include 
 
@@ -57,3 +74,5 @@
   hsa_shut_down();
   return 0;
 }
+
+#endif
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D99949: [AMDGPU][OpenMP] Add amdgpu-arch tool to list AMD GPUs installed

2021-05-07 Thread Pushpinder Singh via Phabricator via cfe-commits
pdhaliwal added a comment.

I have put up a patch D102067  which uses 
__has_include as a workaround for header not found issue. @davezarzycki can you 
check if this resolves the issue?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D99949/new/

https://reviews.llvm.org/D99949

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D102067: [amdgpu-arch] Guard hsa.h with __has_include

2021-05-07 Thread Pushpinder Singh via Phabricator via cfe-commits
pdhaliwal created this revision.
pdhaliwal added reviewers: jdoerfert, JonChesterfield, ronlieb, davezarzycki.
Herald added subscribers: kerbowa, t-tye, tpr, dstuttard, yaxunl, nhaehnle, 
jvesely, kzhuravl.
pdhaliwal requested review of this revision.
Herald added subscribers: cfe-commits, wdng.
Herald added a project: clang.

This patch is suppose to fix the issue of hsa.h not found.
Issue was reported in D99949 


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D102067

Files:
  clang/tools/amdgpu-arch/AMDGPUArch.cpp


Index: clang/tools/amdgpu-arch/AMDGPUArch.cpp
===
--- clang/tools/amdgpu-arch/AMDGPUArch.cpp
+++ clang/tools/amdgpu-arch/AMDGPUArch.cpp
@@ -11,7 +11,16 @@
 //
 
//===--===//
 
-#include 
+#if defined(__has_include)
+#if __has_include("hsa.h")
+#include "hsa.h"
+#elif __has_include("hsa/hsa.h")
+#include "hsa/hsa.h"
+#endif
+#else
+#include "hsa.h"
+#endif
+
 #include 
 #include 
 


Index: clang/tools/amdgpu-arch/AMDGPUArch.cpp
===
--- clang/tools/amdgpu-arch/AMDGPUArch.cpp
+++ clang/tools/amdgpu-arch/AMDGPUArch.cpp
@@ -11,7 +11,16 @@
 //
 //===--===//
 
-#include 
+#if defined(__has_include)
+#if __has_include("hsa.h")
+#include "hsa.h"
+#elif __has_include("hsa/hsa.h")
+#include "hsa/hsa.h"
+#endif
+#else
+#include "hsa.h"
+#endif
+
 #include 
 #include 
 
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D102065: [AMDGPU][OpenMP] Emit textual IR for -emit-llvm -S

2021-05-07 Thread Pushpinder Singh via Phabricator via cfe-commits
pdhaliwal added inline comments.



Comment at: clang/lib/Driver/ToolChains/Clang.cpp:4397
JA.getType() == types::TY_LTO_BC) {
-  CmdArgs.push_back("-emit-llvm-bc");
+  // Emit textual llvm IR for AMDGPU offloading for -emit-llvm -S
+  if (Triple.isAMDGCN() && IsOpenMPDevice) {

JonChesterfield wrote:
> Does unconditionally using emit-llvm here achieve the same result?
Nope, it would start producing textual IR even when there was no -emit-llvm -S. 
This could be problematic since we always have been producing .bc files 
including our libraries.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D102065/new/

https://reviews.llvm.org/D102065

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D102065: [AMDGPU][OpenMP] Emit textual IR for -emit-llvm -S

2021-05-07 Thread Pushpinder Singh via Phabricator via cfe-commits
pdhaliwal created this revision.
pdhaliwal added reviewers: jdoerfert, JonChesterfield, ronlieb, gregrodgers.
Herald added subscribers: kerbowa, guansong, t-tye, tpr, dstuttard, yaxunl, 
nhaehnle, jvesely, kzhuravl.
pdhaliwal requested review of this revision.
Herald added subscribers: cfe-commits, sstefan1, wdng.
Herald added a project: clang.

Previously clang would print a binary blob into the bundled file
for amdgcn. With this patch, it will instead print textual IR as
expected.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D102065

Files:
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/test/Driver/amdgpu-openmp-toolchain.c


Index: clang/test/Driver/amdgpu-openmp-toolchain.c
===
--- clang/test/Driver/amdgpu-openmp-toolchain.c
+++ clang/test/Driver/amdgpu-openmp-toolchain.c
@@ -71,3 +71,6 @@
 // CHECK-C: "x86_64-unknown-linux-gnu" - "clang"
 // CHECK-C: "x86_64-unknown-linux-gnu" - "clang::as"
 // CHECK-C: "x86_64-unknown-linux-gnu" - "offload bundler"
+
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -emit-llvm -S -fopenmp 
-fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa 
-march=gfx803 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-EMIT-LLVM-IR
+// CHECK-EMIT-LLVM-IR: {{.*}}clang-13" "-cc1" "-triple" 
"amdgcn-amd-amdhsa"{{.*}}"-emit-llvm"
Index: clang/lib/Driver/ToolChains/Clang.cpp
===
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -4394,7 +4394,13 @@
   CmdArgs.push_back("-emit-llvm");
 } else if (JA.getType() == types::TY_LLVM_BC ||
JA.getType() == types::TY_LTO_BC) {
-  CmdArgs.push_back("-emit-llvm-bc");
+  // Emit textual llvm IR for AMDGPU offloading for -emit-llvm -S
+  if (Triple.isAMDGCN() && IsOpenMPDevice) {
+if (Args.hasArg(options::OPT_S) && Args.hasArg(options::OPT_emit_llvm))
+  CmdArgs.push_back("-emit-llvm");
+  } else {
+CmdArgs.push_back("-emit-llvm-bc");
+  }
 } else if (JA.getType() == types::TY_IFS ||
JA.getType() == types::TY_IFS_CPP) {
   StringRef ArgStr =


Index: clang/test/Driver/amdgpu-openmp-toolchain.c
===
--- clang/test/Driver/amdgpu-openmp-toolchain.c
+++ clang/test/Driver/amdgpu-openmp-toolchain.c
@@ -71,3 +71,6 @@
 // CHECK-C: "x86_64-unknown-linux-gnu" - "clang"
 // CHECK-C: "x86_64-unknown-linux-gnu" - "clang::as"
 // CHECK-C: "x86_64-unknown-linux-gnu" - "offload bundler"
+
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -emit-llvm -S -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx803 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-EMIT-LLVM-IR
+// CHECK-EMIT-LLVM-IR: {{.*}}clang-13" "-cc1" "-triple" "amdgcn-amd-amdhsa"{{.*}}"-emit-llvm"
Index: clang/lib/Driver/ToolChains/Clang.cpp
===
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -4394,7 +4394,13 @@
   CmdArgs.push_back("-emit-llvm");
 } else if (JA.getType() == types::TY_LLVM_BC ||
JA.getType() == types::TY_LTO_BC) {
-  CmdArgs.push_back("-emit-llvm-bc");
+  // Emit textual llvm IR for AMDGPU offloading for -emit-llvm -S
+  if (Triple.isAMDGCN() && IsOpenMPDevice) {
+if (Args.hasArg(options::OPT_S) && Args.hasArg(options::OPT_emit_llvm))
+  CmdArgs.push_back("-emit-llvm");
+  } else {
+CmdArgs.push_back("-emit-llvm-bc");
+  }
 } else if (JA.getType() == types::TY_IFS ||
JA.getType() == types::TY_IFS_CPP) {
   StringRef ArgStr =
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D99949: [AMDGPU][OpenMP] Add amdgpu-arch tool to list AMD GPUs installed

2021-05-06 Thread Pushpinder Singh via Phabricator via cfe-commits
pdhaliwal added a comment.

Greg was also interested in having pci ids table in amdgpu-arch. And, keeping 
this table inside the target/amdgpu directory sounds like a good idea. Overall, 
I agree with not having dependency on hsa as it has caused many issues.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D99949/new/

https://reviews.llvm.org/D99949

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D99949: [AMDGPU][OpenMP] Add amdgpu-arch tool to list AMD GPUs installed

2021-05-06 Thread Pushpinder Singh via Phabricator via cfe-commits
pdhaliwal added a comment.

I could not find anything in the cmake files which could point to the issue 
mentioned here. @davezarzycki, are you on fedora/redhat?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D99949/new/

https://reviews.llvm.org/D99949

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D101926: [amdgpu-arch] Fix rpath to run from build dir

2021-05-06 Thread Pushpinder Singh via Phabricator via cfe-commits
pdhaliwal accepted this revision.
pdhaliwal added a comment.
This revision is now accepted and ready to land.

Looks good to me.




Comment at: clang/tools/amdgpu-arch/CMakeLists.txt:17
 
+set_target_properties(amdgpu-arch PROPERTIES INSTALL_RPATH_USE_LINK_PATH ON)
+

Curious, does this fix the linking issue mentioned in 
https://reviews.llvm.org/D99949#2739522?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D101926/new/

https://reviews.llvm.org/D101926

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D99949: [AMDGPU][OpenMP] Add amdgpu-arch tool to list AMD GPUs installed

2021-05-06 Thread Pushpinder Singh via Phabricator via cfe-commits
pdhaliwal added a comment.

I am investigating the find_package issue.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D99949/new/

https://reviews.llvm.org/D99949

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D101901: [AMDGPU][OpenMP] Fix clang driver crash when provided -c

2021-05-05 Thread Pushpinder Singh via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rG1f5cacfcb845: [AMDGPU][OpenMP] Fix clang driver crash when 
provided -c (authored by pdhaliwal).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D101901/new/

https://reviews.llvm.org/D101901

Files:
  clang/lib/Driver/Driver.cpp
  clang/test/Driver/amdgpu-openmp-toolchain.c


Index: clang/test/Driver/amdgpu-openmp-toolchain.c
===
--- clang/test/Driver/amdgpu-openmp-toolchain.c
+++ clang/test/Driver/amdgpu-openmp-toolchain.c
@@ -26,12 +26,14 @@
 // CHECK-PHASES: 6: preprocessor, {5}, cpp-output, (device-openmp)
 // CHECK-PHASES: 7: compiler, {6}, ir, (device-openmp)
 // CHECK-PHASES: 8: offload, "host-openmp (x86_64-unknown-linux-gnu)" {2}, 
"device-openmp (amdgcn-amd-amdhsa)" {7}, ir
-// CHECK-PHASES: 9: linker, {8}, image, (device-openmp)
-// CHECK-PHASES: 10: offload, "device-openmp (amdgcn-amd-amdhsa)" {9}, image
-// CHECK-PHASES: 11: clang-offload-wrapper, {10}, ir, (host-openmp)
-// CHECK-PHASES: 12: backend, {11}, assembler, (host-openmp)
-// CHECK-PHASES: 13: assembler, {12}, object, (host-openmp)
-// CHECK-PHASES: 14: linker, {4, 13}, image, (host-openmp)
+// CHECK-PHASES: 9: backend, {8}, assembler, (device-openmp)
+// CHECK-PHASES: 10: assembler, {9}, object, (device-openmp)
+// CHECK-PHASES: 11: linker, {10}, image, (device-openmp)
+// CHECK-PHASES: 12: offload, "device-openmp (amdgcn-amd-amdhsa)" {11}, image
+// CHECK-PHASES: 13: clang-offload-wrapper, {12}, ir, (host-openmp)
+// CHECK-PHASES: 14: backend, {13}, assembler, (host-openmp)
+// CHECK-PHASES: 15: assembler, {14}, object, (host-openmp)
+// CHECK-PHASES: 16: linker, {4, 15}, image, (host-openmp)
 
 // handling of --libomptarget-amdgcn-bc-path
 // RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp 
-fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa 
-march=gfx803 
--libomptarget-amdgcn-bc-path=%S/Inputs/hip_dev_lib/libomptarget-amdgcn-gfx803.bc
 %s 2>&1 | FileCheck %s --check-prefix=CHECK-LIBOMPTARGET
@@ -58,3 +60,14 @@
 // RUN:   | FileCheck %s --check-prefix=CHECK-SAVE-ASM
 // CHECK-SAVE-ASM: llc{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906-linked.bc" 
"-mtriple=amdgcn-amd-amdhsa" "-mcpu=gfx906" "-filetype=asm" 
"-o"{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906.s"
 // CHECK-SAVE-ASM: llc{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906-linked.bc" 
"-mtriple=amdgcn-amd-amdhsa" "-mcpu=gfx906" "-filetype=obj" 
"-o"{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906.o"
+
+// check the handling of -c
+// RUN:   env LIBRARY_PATH=%S/Inputs/hip_dev_lib %clang -ccc-print-bindings -c 
--target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa 
-Xopenmp-target=amdgcn-amd-amdhsa -march=gfx906 -save-temps %s 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-C
+// CHECK-C: "x86_64-unknown-linux-gnu" - "clang",
+// CHECK-C: "x86_64-unknown-linux-gnu" - "clang",{{.*}}output: "[[HOST_BC:.*]]"
+// CHECK-C: "amdgcn-amd-amdhsa" - "clang",{{.*}}output: "[[DEVICE_I:.*]]"
+// CHECK-C: "amdgcn-amd-amdhsa" - "clang", inputs: ["[[DEVICE_I]]", 
"[[HOST_BC]]"]
+// CHECK-C: "x86_64-unknown-linux-gnu" - "clang"
+// CHECK-C: "x86_64-unknown-linux-gnu" - "clang::as"
+// CHECK-C: "x86_64-unknown-linux-gnu" - "offload bundler"
Index: clang/lib/Driver/Driver.cpp
===
--- clang/lib/Driver/Driver.cpp
+++ clang/lib/Driver/Driver.cpp
@@ -3100,16 +3100,8 @@
   }
 
   // By default, we produce an action for each device arch.
-  for (unsigned I = 0; I < ToolChains.size(); ++I) {
-Action * = OpenMPDeviceActions[I];
-// AMDGPU does not support linking of object files, so we skip
-// assemble and backend actions to produce LLVM IR.
-if (ToolChains[I]->getTriple().isAMDGCN() &&
-(CurPhase == phases::Assemble || CurPhase == phases::Backend))
-  continue;
-
+  for (Action * : OpenMPDeviceActions)
 A = C.getDriver().ConstructPhaseAction(C, Args, CurPhase, A);
-  }
 
   return ABRT_Success;
 }
@@ -4594,6 +4586,25 @@
   if (!T)
 return InputInfo();
 
+  if (BuildingForOffloadDevice &&
+  A->getOffloadingDeviceKind() == Action::OFK_OpenMP) {
+if (TC->getTriple().isAMDGCN()) {
+  // AMDGCN treats backend and assemble actions as no-op because
+  // linker does not support object files.
+  if (const BackendJobAction *BA = dyn_cast(A)) {
+return BuildJobsForAction(C, *BA->input_begin(), TC, BoundArch,
+  AtTopLevel, MultipleArchs, LinkingOutput,
+  CachedResults, TargetDeviceOffloadKind);
+  }
+
+  if (const AssembleJobAction *AA = dyn_cast(A)) {
+return BuildJobsForAction(C, *AA->input_begin(), TC, BoundArch,
+  AtTopLevel, MultipleArchs, LinkingOutput,
+   

[PATCH] D101901: [AMDGPU][OpenMP] Fix clang driver crash when provided -c

2021-05-05 Thread Pushpinder Singh via Phabricator via cfe-commits
pdhaliwal created this revision.
pdhaliwal added reviewers: JonChesterfield, ronlieb, jdoerfert, ye-luo, 
tianshilei1992.
Herald added subscribers: kerbowa, guansong, t-tye, tpr, dstuttard, yaxunl, 
nhaehnle, jvesely, kzhuravl.
pdhaliwal requested review of this revision.
Herald added subscribers: cfe-commits, sstefan1, wdng.
Herald added a project: clang.

The offload action is used in four different ways as explained
in Driver.cpp:4495. When -c is present, the final phase will be
assemble (linker when -c is not present). However, this phase
is skipped according to D96769  for amdgcn. 
So, offload action
arrives into following situation,

compile (device) ---> offload ---> offload

without -c the chain looks like,
 compile (device) ---> offload ---> linker (device)

---> offload

The former situation creates an unhandled case which causes
problem. The solution presented in this patch delays the D96769 

logic until job creation time. This keeps the offload action
in the 1 of the 4 specified situations.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D101901

Files:
  clang/lib/Driver/Driver.cpp
  clang/test/Driver/amdgpu-openmp-toolchain.c


Index: clang/test/Driver/amdgpu-openmp-toolchain.c
===
--- clang/test/Driver/amdgpu-openmp-toolchain.c
+++ clang/test/Driver/amdgpu-openmp-toolchain.c
@@ -26,12 +26,14 @@
 // CHECK-PHASES: 6: preprocessor, {5}, cpp-output, (device-openmp)
 // CHECK-PHASES: 7: compiler, {6}, ir, (device-openmp)
 // CHECK-PHASES: 8: offload, "host-openmp (x86_64-unknown-linux-gnu)" {2}, 
"device-openmp (amdgcn-amd-amdhsa)" {7}, ir
-// CHECK-PHASES: 9: linker, {8}, image, (device-openmp)
-// CHECK-PHASES: 10: offload, "device-openmp (amdgcn-amd-amdhsa)" {9}, image
-// CHECK-PHASES: 11: clang-offload-wrapper, {10}, ir, (host-openmp)
-// CHECK-PHASES: 12: backend, {11}, assembler, (host-openmp)
-// CHECK-PHASES: 13: assembler, {12}, object, (host-openmp)
-// CHECK-PHASES: 14: linker, {4, 13}, image, (host-openmp)
+// CHECK-PHASES: 9: backend, {8}, assembler, (device-openmp)
+// CHECK-PHASES: 10: assembler, {9}, object, (device-openmp)
+// CHECK-PHASES: 11: linker, {10}, image, (device-openmp)
+// CHECK-PHASES: 12: offload, "device-openmp (amdgcn-amd-amdhsa)" {11}, image
+// CHECK-PHASES: 13: clang-offload-wrapper, {12}, ir, (host-openmp)
+// CHECK-PHASES: 14: backend, {13}, assembler, (host-openmp)
+// CHECK-PHASES: 15: assembler, {14}, object, (host-openmp)
+// CHECK-PHASES: 16: linker, {4, 15}, image, (host-openmp)
 
 // handling of --libomptarget-amdgcn-bc-path
 // RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp 
-fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa 
-march=gfx803 
--libomptarget-amdgcn-bc-path=%S/Inputs/hip_dev_lib/libomptarget-amdgcn-gfx803.bc
 %s 2>&1 | FileCheck %s --check-prefix=CHECK-LIBOMPTARGET
@@ -58,3 +60,14 @@
 // RUN:   | FileCheck %s --check-prefix=CHECK-SAVE-ASM
 // CHECK-SAVE-ASM: llc{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906-linked.bc" 
"-mtriple=amdgcn-amd-amdhsa" "-mcpu=gfx906" "-filetype=asm" 
"-o"{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906.s"
 // CHECK-SAVE-ASM: llc{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906-linked.bc" 
"-mtriple=amdgcn-amd-amdhsa" "-mcpu=gfx906" "-filetype=obj" 
"-o"{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906.o"
+
+// check the handling of -c
+// RUN:   env LIBRARY_PATH=%S/Inputs/hip_dev_lib %clang -ccc-print-bindings -c 
--target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa 
-Xopenmp-target=amdgcn-amd-amdhsa -march=gfx906 -save-temps %s 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-C
+// CHECK-C: "x86_64-unknown-linux-gnu" - "clang",
+// CHECK-C: "x86_64-unknown-linux-gnu" - "clang",{{.*}}output: "[[HOST_BC:.*]]"
+// CHECK-C: "amdgcn-amd-amdhsa" - "clang",{{.*}}output: "[[DEVICE_I:.*]]"
+// CHECK-C: "amdgcn-amd-amdhsa" - "clang", inputs: ["[[DEVICE_I]]", 
"[[HOST_BC]]"]
+// CHECK-C: "x86_64-unknown-linux-gnu" - "clang"
+// CHECK-C: "x86_64-unknown-linux-gnu" - "clang::as"
+// CHECK-C: "x86_64-unknown-linux-gnu" - "offload bundler"
Index: clang/lib/Driver/Driver.cpp
===
--- clang/lib/Driver/Driver.cpp
+++ clang/lib/Driver/Driver.cpp
@@ -3100,16 +3100,8 @@
   }
 
   // By default, we produce an action for each device arch.
-  for (unsigned I = 0; I < ToolChains.size(); ++I) {
-Action * = OpenMPDeviceActions[I];
-// AMDGPU does not support linking of object files, so we skip
-// assemble and backend actions to produce LLVM IR.
-if (ToolChains[I]->getTriple().isAMDGCN() &&
-(CurPhase == phases::Assemble || CurPhase == phases::Backend))
-  continue;
-
+  for (Action * : OpenMPDeviceActions)
 A = C.getDriver().ConstructPhaseAction(C, Args, CurPhase, A);
-  }

[PATCH] D99949: [AMDGPU][OpenMP] Add amdgpu-arch tool to list AMD GPUs installed

2021-04-21 Thread Pushpinder Singh via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG722d4d8e7585: [AMDGPU][OpenMP] Add amdgpu-arch tool to list 
AMD GPUs installed (authored by pdhaliwal).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D99949/new/

https://reviews.llvm.org/D99949

Files:
  clang/include/clang/Basic/DiagnosticDriverKinds.td
  clang/include/clang/Driver/Options.td
  clang/lib/Driver/ToolChains/AMDGPU.cpp
  clang/lib/Driver/ToolChains/AMDGPU.h
  clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
  clang/test/Driver/Inputs/amdgpu-arch/amdgpu_arch_different
  clang/test/Driver/Inputs/amdgpu-arch/amdgpu_arch_fail
  clang/test/Driver/Inputs/amdgpu-arch/amdgpu_arch_gfx906
  clang/test/Driver/Inputs/amdgpu-arch/amdgpu_arch_gfx908_gfx908
  clang/test/Driver/amdgpu-openmp-system-arch-fail.c
  clang/test/Driver/amdgpu-openmp-system-arch.c
  clang/tools/CMakeLists.txt
  clang/tools/amdgpu-arch/AMDGPUArch.cpp
  clang/tools/amdgpu-arch/CMakeLists.txt

Index: clang/tools/amdgpu-arch/CMakeLists.txt
===
--- /dev/null
+++ clang/tools/amdgpu-arch/CMakeLists.txt
@@ -0,0 +1,17 @@
+# //===--===//
+# //
+# // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# // See https://llvm.org/LICENSE.txt for details.
+# // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+# //
+# //===--===//
+
+find_package(hsa-runtime64 QUIET 1.2.0 HINTS ${CMAKE_INSTALL_PREFIX} PATHS /opt/rocm)
+if (NOT ${hsa-runtime64_FOUND})
+  message(STATUS "Not building amdgpu-arch: hsa-runtime64 not found")
+  return()
+endif()
+
+add_clang_tool(amdgpu-arch AMDGPUArch.cpp)
+
+clang_target_link_libraries(amdgpu-arch PRIVATE hsa-runtime64::hsa-runtime64)
Index: clang/tools/amdgpu-arch/AMDGPUArch.cpp
===
--- /dev/null
+++ clang/tools/amdgpu-arch/AMDGPUArch.cpp
@@ -0,0 +1,59 @@
+//===- AMDGPUArch.cpp - list AMDGPU installed --*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+// This file implements a tool for detecting name of AMDGPU installed in system
+// using HSA. This tool is used by AMDGPU OpenMP driver.
+//
+//===--===//
+
+#include 
+#include 
+#include 
+
+static hsa_status_t iterateAgentsCallback(hsa_agent_t Agent, void *Data) {
+  hsa_device_type_t DeviceType;
+  hsa_status_t Status =
+  hsa_agent_get_info(Agent, HSA_AGENT_INFO_DEVICE, );
+
+  // continue only if device type if GPU
+  if (Status != HSA_STATUS_SUCCESS || DeviceType != HSA_DEVICE_TYPE_GPU) {
+return Status;
+  }
+
+  std::vector *GPUs =
+  static_cast *>(Data);
+  char GPUName[64];
+  Status = hsa_agent_get_info(Agent, HSA_AGENT_INFO_NAME, GPUName);
+  if (Status != HSA_STATUS_SUCCESS) {
+return Status;
+  }
+  GPUs->push_back(GPUName);
+  return HSA_STATUS_SUCCESS;
+}
+
+int main() {
+  hsa_status_t Status = hsa_init();
+  if (Status != HSA_STATUS_SUCCESS) {
+return 1;
+  }
+
+  std::vector GPUs;
+  Status = hsa_iterate_agents(iterateAgentsCallback, );
+  if (Status != HSA_STATUS_SUCCESS) {
+return 1;
+  }
+
+  for (const auto  : GPUs)
+printf("%s\n", GPU.c_str());
+
+  if (GPUs.size() < 1)
+return 1;
+
+  hsa_shut_down();
+  return 0;
+}
Index: clang/tools/CMakeLists.txt
===
--- clang/tools/CMakeLists.txt
+++ clang/tools/CMakeLists.txt
@@ -43,3 +43,5 @@
 
 # libclang may require clang-tidy in clang-tools-extra.
 add_clang_subdirectory(libclang)
+
+add_clang_subdirectory(amdgpu-arch)
Index: clang/test/Driver/amdgpu-openmp-system-arch.c
===
--- /dev/null
+++ clang/test/Driver/amdgpu-openmp-system-arch.c
@@ -0,0 +1,24 @@
+// REQUIRES: system-linux
+// REQUIRES: x86-registered-target
+// REQUIRES: amdgpu-registered-target
+// REQUIRES: shell
+
+// RUN: mkdir -p %t
+// RUN: rm -f %t/amdgpu_arch_gfx906
+// RUN: cp %S/Inputs/amdgpu-arch/amdgpu_arch_gfx906 %t/
+// RUN: cp %S/Inputs/amdgpu-arch/amdgpu_arch_gfx908_gfx908 %t/
+// RUN: chmod +x %t/amdgpu_arch_gfx906
+// RUN: chmod +x %t/amdgpu_arch_gfx908_gfx908
+
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -nogpulib --amdgpu-arch-tool=%t/amdgpu_arch_gfx906 %s 2>&1 \
+// RUN:   | FileCheck %s
+// CHECK: clang{{.*}}"-cc1"{{.*}}"-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" 

[PATCH] D99949: [AMDGPU][OpenMP] Add amdgpu-arch tool to list AMD GPUs installed

2021-04-21 Thread Pushpinder Singh via Phabricator via cfe-commits
pdhaliwal updated this revision to Diff 339235.
pdhaliwal added a comment.

Replaced the return commands in test scripts with exit command. It seems like
return is handled bit differently on fedora/rhel machines.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D99949/new/

https://reviews.llvm.org/D99949

Files:
  clang/include/clang/Basic/DiagnosticDriverKinds.td
  clang/include/clang/Driver/Options.td
  clang/lib/Driver/ToolChains/AMDGPU.cpp
  clang/lib/Driver/ToolChains/AMDGPU.h
  clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
  clang/test/Driver/Inputs/amdgpu-arch/amdgpu_arch_different
  clang/test/Driver/Inputs/amdgpu-arch/amdgpu_arch_fail
  clang/test/Driver/Inputs/amdgpu-arch/amdgpu_arch_gfx906
  clang/test/Driver/Inputs/amdgpu-arch/amdgpu_arch_gfx908_gfx908
  clang/test/Driver/amdgpu-openmp-system-arch-fail.c
  clang/test/Driver/amdgpu-openmp-system-arch.c
  clang/tools/CMakeLists.txt
  clang/tools/amdgpu-arch/AMDGPUArch.cpp
  clang/tools/amdgpu-arch/CMakeLists.txt

Index: clang/tools/amdgpu-arch/CMakeLists.txt
===
--- /dev/null
+++ clang/tools/amdgpu-arch/CMakeLists.txt
@@ -0,0 +1,17 @@
+# //===--===//
+# //
+# // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# // See https://llvm.org/LICENSE.txt for details.
+# // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+# //
+# //===--===//
+
+find_package(hsa-runtime64 QUIET 1.2.0 HINTS ${CMAKE_INSTALL_PREFIX} PATHS /opt/rocm)
+if (NOT ${hsa-runtime64_FOUND})
+  message(STATUS "Not building amdgpu-arch: hsa-runtime64 not found")
+  return()
+endif()
+
+add_clang_tool(amdgpu-arch AMDGPUArch.cpp)
+
+clang_target_link_libraries(amdgpu-arch PRIVATE hsa-runtime64::hsa-runtime64)
Index: clang/tools/amdgpu-arch/AMDGPUArch.cpp
===
--- /dev/null
+++ clang/tools/amdgpu-arch/AMDGPUArch.cpp
@@ -0,0 +1,59 @@
+//===- AMDGPUArch.cpp - list AMDGPU installed --*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+// This file implements a tool for detecting name of AMDGPU installed in system
+// using HSA. This tool is used by AMDGPU OpenMP driver.
+//
+//===--===//
+
+#include 
+#include 
+#include 
+
+static hsa_status_t iterateAgentsCallback(hsa_agent_t Agent, void *Data) {
+  hsa_device_type_t DeviceType;
+  hsa_status_t Status =
+  hsa_agent_get_info(Agent, HSA_AGENT_INFO_DEVICE, );
+
+  // continue only if device type if GPU
+  if (Status != HSA_STATUS_SUCCESS || DeviceType != HSA_DEVICE_TYPE_GPU) {
+return Status;
+  }
+
+  std::vector *GPUs =
+  static_cast *>(Data);
+  char GPUName[64];
+  Status = hsa_agent_get_info(Agent, HSA_AGENT_INFO_NAME, GPUName);
+  if (Status != HSA_STATUS_SUCCESS) {
+return Status;
+  }
+  GPUs->push_back(GPUName);
+  return HSA_STATUS_SUCCESS;
+}
+
+int main() {
+  hsa_status_t Status = hsa_init();
+  if (Status != HSA_STATUS_SUCCESS) {
+return 1;
+  }
+
+  std::vector GPUs;
+  Status = hsa_iterate_agents(iterateAgentsCallback, );
+  if (Status != HSA_STATUS_SUCCESS) {
+return 1;
+  }
+
+  for (const auto  : GPUs)
+printf("%s\n", GPU.c_str());
+
+  if (GPUs.size() < 1)
+return 1;
+
+  hsa_shut_down();
+  return 0;
+}
Index: clang/tools/CMakeLists.txt
===
--- clang/tools/CMakeLists.txt
+++ clang/tools/CMakeLists.txt
@@ -43,3 +43,5 @@
 
 # libclang may require clang-tidy in clang-tools-extra.
 add_clang_subdirectory(libclang)
+
+add_clang_subdirectory(amdgpu-arch)
Index: clang/test/Driver/amdgpu-openmp-system-arch.c
===
--- /dev/null
+++ clang/test/Driver/amdgpu-openmp-system-arch.c
@@ -0,0 +1,24 @@
+// REQUIRES: system-linux
+// REQUIRES: x86-registered-target
+// REQUIRES: amdgpu-registered-target
+// REQUIRES: shell
+
+// RUN: mkdir -p %t
+// RUN: rm -f %t/amdgpu_arch_gfx906
+// RUN: cp %S/Inputs/amdgpu-arch/amdgpu_arch_gfx906 %t/
+// RUN: cp %S/Inputs/amdgpu-arch/amdgpu_arch_gfx908_gfx908 %t/
+// RUN: chmod +x %t/amdgpu_arch_gfx906
+// RUN: chmod +x %t/amdgpu_arch_gfx908_gfx908
+
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -nogpulib --amdgpu-arch-tool=%t/amdgpu_arch_gfx906 %s 2>&1 \
+// RUN:   | FileCheck %s
+// CHECK: clang{{.*}}"-cc1"{{.*}}"-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "[[GFX:gfx906]]"
+// CHECK: llvm-link{{.*}}"-o" 

[PATCH] D99949: [AMDGPU][OpenMP] Add amdgpu-arch tool to list AMD GPUs installed

2021-04-20 Thread Pushpinder Singh via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG3194761d2763: [AMDGPU][OpenMP] Add amdgpu-arch tool to list 
AMD GPUs installed (authored by pdhaliwal).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D99949/new/

https://reviews.llvm.org/D99949

Files:
  clang/include/clang/Basic/DiagnosticDriverKinds.td
  clang/include/clang/Driver/Options.td
  clang/lib/Driver/ToolChains/AMDGPU.cpp
  clang/lib/Driver/ToolChains/AMDGPU.h
  clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
  clang/test/Driver/Inputs/amdgpu-arch/amdgpu_arch_different
  clang/test/Driver/Inputs/amdgpu-arch/amdgpu_arch_fail
  clang/test/Driver/Inputs/amdgpu-arch/amdgpu_arch_gfx906
  clang/test/Driver/Inputs/amdgpu-arch/amdgpu_arch_gfx908_gfx908
  clang/test/Driver/amdgpu-openmp-system-arch-fail.c
  clang/test/Driver/amdgpu-openmp-system-arch.c
  clang/tools/CMakeLists.txt
  clang/tools/amdgpu-arch/AMDGPUArch.cpp
  clang/tools/amdgpu-arch/CMakeLists.txt

Index: clang/tools/amdgpu-arch/CMakeLists.txt
===
--- /dev/null
+++ clang/tools/amdgpu-arch/CMakeLists.txt
@@ -0,0 +1,17 @@
+# //===--===//
+# //
+# // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# // See https://llvm.org/LICENSE.txt for details.
+# // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+# //
+# //===--===//
+
+find_package(hsa-runtime64 QUIET 1.2.0 HINTS ${CMAKE_INSTALL_PREFIX} PATHS /opt/rocm)
+if (NOT ${hsa-runtime64_FOUND})
+  message(STATUS "Not building amdgpu-arch: hsa-runtime64 not found")
+  return()
+endif()
+
+add_clang_tool(amdgpu-arch AMDGPUArch.cpp)
+
+clang_target_link_libraries(amdgpu-arch PRIVATE hsa-runtime64::hsa-runtime64)
Index: clang/tools/amdgpu-arch/AMDGPUArch.cpp
===
--- /dev/null
+++ clang/tools/amdgpu-arch/AMDGPUArch.cpp
@@ -0,0 +1,59 @@
+//===- AMDGPUArch.cpp - list AMDGPU installed --*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+// This file implements a tool for detecting name of AMDGPU installed in system
+// using HSA. This tool is used by AMDGPU OpenMP driver.
+//
+//===--===//
+
+#include 
+#include 
+#include 
+
+static hsa_status_t iterateAgentsCallback(hsa_agent_t Agent, void *Data) {
+  hsa_device_type_t DeviceType;
+  hsa_status_t Status =
+  hsa_agent_get_info(Agent, HSA_AGENT_INFO_DEVICE, );
+
+  // continue only if device type if GPU
+  if (Status != HSA_STATUS_SUCCESS || DeviceType != HSA_DEVICE_TYPE_GPU) {
+return Status;
+  }
+
+  std::vector *GPUs =
+  static_cast *>(Data);
+  char GPUName[64];
+  Status = hsa_agent_get_info(Agent, HSA_AGENT_INFO_NAME, GPUName);
+  if (Status != HSA_STATUS_SUCCESS) {
+return Status;
+  }
+  GPUs->push_back(GPUName);
+  return HSA_STATUS_SUCCESS;
+}
+
+int main() {
+  hsa_status_t Status = hsa_init();
+  if (Status != HSA_STATUS_SUCCESS) {
+return 1;
+  }
+
+  std::vector GPUs;
+  Status = hsa_iterate_agents(iterateAgentsCallback, );
+  if (Status != HSA_STATUS_SUCCESS) {
+return 1;
+  }
+
+  for (const auto  : GPUs)
+printf("%s\n", GPU.c_str());
+
+  if (GPUs.size() < 1)
+return 1;
+
+  hsa_shut_down();
+  return 0;
+}
Index: clang/tools/CMakeLists.txt
===
--- clang/tools/CMakeLists.txt
+++ clang/tools/CMakeLists.txt
@@ -43,3 +43,5 @@
 
 # libclang may require clang-tidy in clang-tools-extra.
 add_clang_subdirectory(libclang)
+
+add_clang_subdirectory(amdgpu-arch)
Index: clang/test/Driver/amdgpu-openmp-system-arch.c
===
--- /dev/null
+++ clang/test/Driver/amdgpu-openmp-system-arch.c
@@ -0,0 +1,24 @@
+// REQUIRES: system-linux
+// REQUIRES: x86-registered-target
+// REQUIRES: amdgpu-registered-target
+// REQUIRES: shell
+
+// RUN: mkdir -p %t
+// RUN: rm -f %t/amdgpu_arch_gfx906
+// RUN: cp %S/Inputs/amdgpu-arch/amdgpu_arch_gfx906 %t/
+// RUN: cp %S/Inputs/amdgpu-arch/amdgpu_arch_gfx908_gfx908 %t/
+// RUN: chmod +x %t/amdgpu_arch_gfx906
+// RUN: chmod +x %t/amdgpu_arch_gfx908_gfx908
+
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -nogpulib --amdgpu-arch-tool=%t/amdgpu_arch_gfx906 %s 2>&1 \
+// RUN:   | FileCheck %s
+// CHECK: clang{{.*}}"-cc1"{{.*}}"-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" 

[PATCH] D99949: [AMDGPU][OpenMP] Add amdgpu-arch tool to list AMD GPUs installed

2021-04-20 Thread Pushpinder Singh via Phabricator via cfe-commits
pdhaliwal updated this revision to Diff 338843.
pdhaliwal marked 2 inline comments as done.
pdhaliwal added a comment.

Review comments addressed.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D99949/new/

https://reviews.llvm.org/D99949

Files:
  clang/include/clang/Basic/DiagnosticDriverKinds.td
  clang/include/clang/Driver/Options.td
  clang/lib/Driver/ToolChains/AMDGPU.cpp
  clang/lib/Driver/ToolChains/AMDGPU.h
  clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
  clang/test/Driver/Inputs/amdgpu-arch/amdgpu_arch_different
  clang/test/Driver/Inputs/amdgpu-arch/amdgpu_arch_fail
  clang/test/Driver/Inputs/amdgpu-arch/amdgpu_arch_gfx906
  clang/test/Driver/Inputs/amdgpu-arch/amdgpu_arch_gfx908_gfx908
  clang/test/Driver/amdgpu-openmp-system-arch-fail.c
  clang/test/Driver/amdgpu-openmp-system-arch.c
  clang/tools/CMakeLists.txt
  clang/tools/amdgpu-arch/AMDGPUArch.cpp
  clang/tools/amdgpu-arch/CMakeLists.txt

Index: clang/tools/amdgpu-arch/CMakeLists.txt
===
--- /dev/null
+++ clang/tools/amdgpu-arch/CMakeLists.txt
@@ -0,0 +1,17 @@
+# //===--===//
+# //
+# // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# // See https://llvm.org/LICENSE.txt for details.
+# // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+# //
+# //===--===//
+
+find_package(hsa-runtime64 QUIET 1.2.0 HINTS ${CMAKE_INSTALL_PREFIX} PATHS /opt/rocm)
+if (NOT ${hsa-runtime64_FOUND})
+  message(STATUS "Not building amdgpu-arch: hsa-runtime64 not found")
+  return()
+endif()
+
+add_clang_tool(amdgpu-arch AMDGPUArch.cpp)
+
+clang_target_link_libraries(amdgpu-arch PRIVATE hsa-runtime64::hsa-runtime64)
Index: clang/tools/amdgpu-arch/AMDGPUArch.cpp
===
--- /dev/null
+++ clang/tools/amdgpu-arch/AMDGPUArch.cpp
@@ -0,0 +1,59 @@
+//===- AMDGPUArch.cpp - list AMDGPU installed --*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+// This file implements a tool for detecting name of AMDGPU installed in system
+// using HSA. This tool is used by AMDGPU OpenMP driver.
+//
+//===--===//
+
+#include 
+#include 
+#include 
+
+static hsa_status_t iterateAgentsCallback(hsa_agent_t Agent, void *Data) {
+  hsa_device_type_t DeviceType;
+  hsa_status_t Status =
+  hsa_agent_get_info(Agent, HSA_AGENT_INFO_DEVICE, );
+
+  // continue only if device type if GPU
+  if (Status != HSA_STATUS_SUCCESS || DeviceType != HSA_DEVICE_TYPE_GPU) {
+return Status;
+  }
+
+  std::vector *GPUs =
+  static_cast *>(Data);
+  char GPUName[64];
+  Status = hsa_agent_get_info(Agent, HSA_AGENT_INFO_NAME, GPUName);
+  if (Status != HSA_STATUS_SUCCESS) {
+return Status;
+  }
+  GPUs->push_back(GPUName);
+  return HSA_STATUS_SUCCESS;
+}
+
+int main() {
+  hsa_status_t Status = hsa_init();
+  if (Status != HSA_STATUS_SUCCESS) {
+return 1;
+  }
+
+  std::vector GPUs;
+  Status = hsa_iterate_agents(iterateAgentsCallback, );
+  if (Status != HSA_STATUS_SUCCESS) {
+return 1;
+  }
+
+  for (const auto  : GPUs)
+printf("%s\n", GPU.c_str());
+
+  if (GPUs.size() < 1)
+return 1;
+
+  hsa_shut_down();
+  return 0;
+}
Index: clang/tools/CMakeLists.txt
===
--- clang/tools/CMakeLists.txt
+++ clang/tools/CMakeLists.txt
@@ -43,3 +43,5 @@
 
 # libclang may require clang-tidy in clang-tools-extra.
 add_clang_subdirectory(libclang)
+
+add_clang_subdirectory(amdgpu-arch)
Index: clang/test/Driver/amdgpu-openmp-system-arch.c
===
--- /dev/null
+++ clang/test/Driver/amdgpu-openmp-system-arch.c
@@ -0,0 +1,24 @@
+// REQUIRES: system-linux
+// REQUIRES: x86-registered-target
+// REQUIRES: amdgpu-registered-target
+// REQUIRES: shell
+
+// RUN: mkdir -p %t
+// RUN: rm -f %t/amdgpu_arch_gfx906
+// RUN: cp %S/Inputs/amdgpu-arch/amdgpu_arch_gfx906 %t/
+// RUN: cp %S/Inputs/amdgpu-arch/amdgpu_arch_gfx908_gfx908 %t/
+// RUN: chmod +x %t/amdgpu_arch_gfx906
+// RUN: chmod +x %t/amdgpu_arch_gfx908_gfx908
+
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -nogpulib --amdgpu-arch-tool=%t/amdgpu_arch_gfx906 %s 2>&1 \
+// RUN:   | FileCheck %s
+// CHECK: clang{{.*}}"-cc1"{{.*}}"-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "[[GFX:gfx906]]"
+// CHECK: llvm-link{{.*}}"-o" "{{.*}}amdgpu-openmp-system-arch-{{.*}}-[[GFX]]-linked-{{.*}}.bc"

[PATCH] D99949: [AMDGPU][OpenMP] Add amdgpu-arch tool to list AMD GPUs installed

2021-04-20 Thread Pushpinder Singh via Phabricator via cfe-commits
pdhaliwal updated this revision to Diff 338810.
pdhaliwal added a comment.

Reopening this. This version is supposed to fix the buildbot failures on PPC 
machines.
Since I don't have PPC machine I am not sure if this will work. But the logic
followed here is motivated from Clang :: Driver/program-path-priority.c, so 
hopefully
it will pass the CI.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D99949/new/

https://reviews.llvm.org/D99949

Files:
  clang/include/clang/Basic/DiagnosticDriverKinds.td
  clang/include/clang/Driver/Options.td
  clang/lib/Driver/ToolChains/AMDGPU.cpp
  clang/lib/Driver/ToolChains/AMDGPU.h
  clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
  clang/test/Driver/Inputs/amdgpu-arch/amdgpu_arch_different
  clang/test/Driver/Inputs/amdgpu-arch/amdgpu_arch_fail
  clang/test/Driver/Inputs/amdgpu-arch/amdgpu_arch_gfx906
  clang/test/Driver/Inputs/amdgpu-arch/amdgpu_arch_gfx908_gfx908
  clang/test/Driver/amdgpu-openmp-system-arch-fail.c
  clang/test/Driver/amdgpu-openmp-system-arch.c
  clang/tools/CMakeLists.txt
  clang/tools/amdgpu-arch/AMDGPUArch.cpp
  clang/tools/amdgpu-arch/CMakeLists.txt

Index: clang/tools/amdgpu-arch/CMakeLists.txt
===
--- /dev/null
+++ clang/tools/amdgpu-arch/CMakeLists.txt
@@ -0,0 +1,17 @@
+# //===--===//
+# //
+# // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# // See https://llvm.org/LICENSE.txt for details.
+# // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+# //
+# //===--===//
+
+find_package(hsa-runtime64 QUIET 1.2.0 HINTS ${CMAKE_INSTALL_PREFIX} PATHS /opt/rocm)
+if (NOT ${hsa-runtime64_FOUND})
+  message(STATUS "Not building amdgpu-arch: hsa-runtime64 not found")
+  return()
+endif()
+
+add_clang_tool(amdgpu-arch AMDGPUArch.cpp)
+
+clang_target_link_libraries(amdgpu-arch PRIVATE hsa-runtime64::hsa-runtime64)
Index: clang/tools/amdgpu-arch/AMDGPUArch.cpp
===
--- /dev/null
+++ clang/tools/amdgpu-arch/AMDGPUArch.cpp
@@ -0,0 +1,59 @@
+//===- AMDGPUArch.cpp - list AMDGPU installed --*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+// This file implements a tool for detecting name of AMDGPU installed in system
+// using HSA. This tool is used by AMDGPU OpenMP driver.
+//
+//===--===//
+
+#include 
+#include 
+#include 
+
+static hsa_status_t iterateAgentsCallback(hsa_agent_t Agent, void *Data) {
+  hsa_device_type_t DeviceType;
+  hsa_status_t Status =
+  hsa_agent_get_info(Agent, HSA_AGENT_INFO_DEVICE, );
+
+  // continue only if device type if GPU
+  if (Status != HSA_STATUS_SUCCESS || DeviceType != HSA_DEVICE_TYPE_GPU) {
+return Status;
+  }
+
+  std::vector *GPUs =
+  static_cast *>(Data);
+  char GPUName[64];
+  Status = hsa_agent_get_info(Agent, HSA_AGENT_INFO_NAME, GPUName);
+  if (Status != HSA_STATUS_SUCCESS) {
+return Status;
+  }
+  GPUs->push_back(GPUName);
+  return HSA_STATUS_SUCCESS;
+}
+
+int main() {
+  hsa_status_t Status = hsa_init();
+  if (Status != HSA_STATUS_SUCCESS) {
+return 1;
+  }
+
+  std::vector GPUs;
+  Status = hsa_iterate_agents(iterateAgentsCallback, );
+  if (Status != HSA_STATUS_SUCCESS) {
+return 1;
+  }
+
+  for (const auto  : GPUs)
+printf("%s\n", GPU.c_str());
+
+  if (GPUs.size() < 1)
+return 1;
+
+  hsa_shut_down();
+  return 0;
+}
Index: clang/tools/CMakeLists.txt
===
--- clang/tools/CMakeLists.txt
+++ clang/tools/CMakeLists.txt
@@ -43,3 +43,5 @@
 
 # libclang may require clang-tidy in clang-tools-extra.
 add_clang_subdirectory(libclang)
+
+add_clang_subdirectory(amdgpu-arch)
Index: clang/test/Driver/amdgpu-openmp-system-arch.c
===
--- /dev/null
+++ clang/test/Driver/amdgpu-openmp-system-arch.c
@@ -0,0 +1,24 @@
+// REQUIRES: system-linux
+// REQUIRES: x86-registered-target
+// REQUIRES: amdgpu-registered-target
+// REQUIRES: shell
+
+// RUN: mkdir -p %t
+// RUN: rm -f %t/amdgpu_arch_gfx906
+// RUN: cp %S/Inputs/amdgpu-arch/amdgpu_arch_gfx906 %t/
+// RUN: cp %S/Inputs/amdgpu-arch/amdgpu_arch_gfx908_gfx908 %t/
+// RUN: chmod +x %t/amdgpu_arch_gfx906
+// RUN: chmod +x %t/amdgpu_arch_gfx908_gfx908
+
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -nogpulib --amdgpu-arch-tool=%t/amdgpu_arch_gfx906 %s 2>&1 \
+// RUN:   | 

[PATCH] D99949: [AMDGPU][OpenMP] Add amdgpu-arch tool to list AMD GPUs installed

2021-04-16 Thread Pushpinder Singh via Phabricator via cfe-commits
pdhaliwal added a comment.

I've reverted this from main for now as there seems to be issue with executing 
test script on some CI machines.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D99949/new/

https://reviews.llvm.org/D99949

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D99949: [AMDGPU][OpenMP] Add amdgpu-arch tool to list AMD GPUs installed

2021-04-15 Thread Pushpinder Singh via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG7029cffc4e78: [AMDGPU][OpenMP] Add amdgpu-arch tool to list 
AMD GPUs installed (authored by pdhaliwal).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D99949/new/

https://reviews.llvm.org/D99949

Files:
  clang/include/clang/Basic/DiagnosticDriverKinds.td
  clang/include/clang/Driver/Options.td
  clang/lib/Driver/ToolChains/AMDGPU.cpp
  clang/lib/Driver/ToolChains/AMDGPU.h
  clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
  clang/test/Driver/Inputs/amdgpu-arch/amdgpu_arch_different
  clang/test/Driver/Inputs/amdgpu-arch/amdgpu_arch_fail
  clang/test/Driver/Inputs/amdgpu-arch/amdgpu_arch_gfx906
  clang/test/Driver/Inputs/amdgpu-arch/amdgpu_arch_gfx908_gfx908
  clang/test/Driver/amdgpu-openmp-system-arch-fail.c
  clang/test/Driver/amdgpu-openmp-system-arch.c
  clang/tools/CMakeLists.txt
  clang/tools/amdgpu-arch/AMDGPUArch.cpp
  clang/tools/amdgpu-arch/CMakeLists.txt

Index: clang/tools/amdgpu-arch/CMakeLists.txt
===
--- /dev/null
+++ clang/tools/amdgpu-arch/CMakeLists.txt
@@ -0,0 +1,17 @@
+# //===--===//
+# //
+# // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# // See https://llvm.org/LICENSE.txt for details.
+# // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+# //
+# //===--===//
+
+find_package(hsa-runtime64 QUIET 1.2.0 HINTS ${CMAKE_INSTALL_PREFIX} PATHS /opt/rocm)
+if (NOT ${hsa-runtime64_FOUND})
+  message(STATUS "Not building amdgpu-arch: hsa-runtime64 not found")
+  return()
+endif()
+
+add_clang_tool(amdgpu-arch AMDGPUArch.cpp)
+
+clang_target_link_libraries(amdgpu-arch PRIVATE hsa-runtime64::hsa-runtime64)
Index: clang/tools/amdgpu-arch/AMDGPUArch.cpp
===
--- /dev/null
+++ clang/tools/amdgpu-arch/AMDGPUArch.cpp
@@ -0,0 +1,59 @@
+//===- AMDGPUArch.cpp - list AMDGPU installed --*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+// This file implements a tool for detecting name of AMDGPU installed in system
+// using HSA. This tool is used by AMDGPU OpenMP driver.
+//
+//===--===//
+
+#include 
+#include 
+#include 
+
+static hsa_status_t iterateAgentsCallback(hsa_agent_t Agent, void *Data) {
+  hsa_device_type_t DeviceType;
+  hsa_status_t Status =
+  hsa_agent_get_info(Agent, HSA_AGENT_INFO_DEVICE, );
+
+  // continue only if device type if GPU
+  if (Status != HSA_STATUS_SUCCESS || DeviceType != HSA_DEVICE_TYPE_GPU) {
+return Status;
+  }
+
+  std::vector *GPUs =
+  static_cast *>(Data);
+  char GPUName[64];
+  Status = hsa_agent_get_info(Agent, HSA_AGENT_INFO_NAME, GPUName);
+  if (Status != HSA_STATUS_SUCCESS) {
+return Status;
+  }
+  GPUs->push_back(GPUName);
+  return HSA_STATUS_SUCCESS;
+}
+
+int main() {
+  hsa_status_t Status = hsa_init();
+  if (Status != HSA_STATUS_SUCCESS) {
+return 1;
+  }
+
+  std::vector GPUs;
+  Status = hsa_iterate_agents(iterateAgentsCallback, );
+  if (Status != HSA_STATUS_SUCCESS) {
+return 1;
+  }
+
+  for (const auto  : GPUs)
+printf("%s\n", GPU.c_str());
+
+  if (GPUs.size() < 1)
+return 1;
+
+  hsa_shut_down();
+  return 0;
+}
Index: clang/tools/CMakeLists.txt
===
--- clang/tools/CMakeLists.txt
+++ clang/tools/CMakeLists.txt
@@ -43,3 +43,5 @@
 
 # libclang may require clang-tidy in clang-tools-extra.
 add_clang_subdirectory(libclang)
+
+add_clang_subdirectory(amdgpu-arch)
Index: clang/test/Driver/amdgpu-openmp-system-arch.c
===
--- /dev/null
+++ clang/test/Driver/amdgpu-openmp-system-arch.c
@@ -0,0 +1,15 @@
+// REQUIRES: system-linux
+// REQUIRES: x86-registered-target
+// REQUIRES: amdgpu-registered-target
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -nogpulib --amdgpu-arch-tool=%S/Inputs/amdgpu-arch/amdgpu_arch_gfx906 %s 2>&1 \
+// RUN:   | FileCheck %s
+// CHECK: clang{{.*}}"-cc1"{{.*}}"-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "[[GFX:gfx906]]"
+// CHECK: llvm-link{{.*}}"-o" "{{.*}}amdgpu-openmp-system-arch-{{.*}}-[[GFX]]-linked-{{.*}}.bc"
+// CHECK: llc{{.*}}amdgpu-openmp-system-arch-{{.*}}-[[GFX]]-linked-{{.*}}.bc" "-mtriple=amdgcn-amd-amdhsa" "-mcpu=[[GFX]]" "-filetype=obj" 

[PATCH] D99949: [AMDGPU][OpenMP] Add amdgpu-arch tool to list AMD GPUs installed

2021-04-15 Thread Pushpinder Singh via Phabricator via cfe-commits
pdhaliwal updated this revision to Diff 337640.
pdhaliwal added a comment.

Rebase and review comments


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D99949/new/

https://reviews.llvm.org/D99949

Files:
  clang/include/clang/Basic/DiagnosticDriverKinds.td
  clang/include/clang/Driver/Options.td
  clang/lib/Driver/ToolChains/AMDGPU.cpp
  clang/lib/Driver/ToolChains/AMDGPU.h
  clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
  clang/test/Driver/Inputs/amdgpu-arch/amdgpu_arch_different
  clang/test/Driver/Inputs/amdgpu-arch/amdgpu_arch_fail
  clang/test/Driver/Inputs/amdgpu-arch/amdgpu_arch_gfx906
  clang/test/Driver/Inputs/amdgpu-arch/amdgpu_arch_gfx908_gfx908
  clang/test/Driver/amdgpu-openmp-system-arch-fail.c
  clang/test/Driver/amdgpu-openmp-system-arch.c
  clang/tools/CMakeLists.txt
  clang/tools/amdgpu-arch/AMDGPUArch.cpp
  clang/tools/amdgpu-arch/CMakeLists.txt

Index: clang/tools/amdgpu-arch/CMakeLists.txt
===
--- /dev/null
+++ clang/tools/amdgpu-arch/CMakeLists.txt
@@ -0,0 +1,17 @@
+# //===--===//
+# //
+# // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# // See https://llvm.org/LICENSE.txt for details.
+# // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+# //
+# //===--===//
+
+find_package(hsa-runtime64 QUIET 1.2.0 HINTS ${CMAKE_INSTALL_PREFIX} PATHS /opt/rocm)
+if (NOT ${hsa-runtime64_FOUND})
+  message(STATUS "Not building amdgpu-arch: hsa-runtime64 not found")
+  return()
+endif()
+
+add_clang_tool(amdgpu-arch AMDGPUArch.cpp)
+
+clang_target_link_libraries(amdgpu-arch PRIVATE hsa-runtime64::hsa-runtime64)
Index: clang/tools/amdgpu-arch/AMDGPUArch.cpp
===
--- /dev/null
+++ clang/tools/amdgpu-arch/AMDGPUArch.cpp
@@ -0,0 +1,59 @@
+//===- AMDGPUArch.cpp - list AMDGPU installed --*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+// This file implements a tool for detecting name of AMDGPU installed in system
+// using HSA. This tool is used by AMDGPU OpenMP driver.
+//
+//===--===//
+
+#include 
+#include 
+#include 
+
+static hsa_status_t iterateAgentsCallback(hsa_agent_t Agent, void *Data) {
+  hsa_device_type_t DeviceType;
+  hsa_status_t Status =
+  hsa_agent_get_info(Agent, HSA_AGENT_INFO_DEVICE, );
+
+  // continue only if device type if GPU
+  if (Status != HSA_STATUS_SUCCESS || DeviceType != HSA_DEVICE_TYPE_GPU) {
+return Status;
+  }
+
+  std::vector *GPUs =
+  static_cast *>(Data);
+  char GPUName[64];
+  Status = hsa_agent_get_info(Agent, HSA_AGENT_INFO_NAME, GPUName);
+  if (Status != HSA_STATUS_SUCCESS) {
+return Status;
+  }
+  GPUs->push_back(GPUName);
+  return HSA_STATUS_SUCCESS;
+}
+
+int main() {
+  hsa_status_t Status = hsa_init();
+  if (Status != HSA_STATUS_SUCCESS) {
+return 1;
+  }
+
+  std::vector GPUs;
+  Status = hsa_iterate_agents(iterateAgentsCallback, );
+  if (Status != HSA_STATUS_SUCCESS) {
+return 1;
+  }
+
+  for (const auto  : GPUs)
+printf("%s\n", GPU.c_str());
+
+  if (GPUs.size() < 1)
+return 1;
+
+  hsa_shut_down();
+  return 0;
+}
Index: clang/tools/CMakeLists.txt
===
--- clang/tools/CMakeLists.txt
+++ clang/tools/CMakeLists.txt
@@ -43,3 +43,5 @@
 
 # libclang may require clang-tidy in clang-tools-extra.
 add_clang_subdirectory(libclang)
+
+add_clang_subdirectory(amdgpu-arch)
Index: clang/test/Driver/amdgpu-openmp-system-arch.c
===
--- /dev/null
+++ clang/test/Driver/amdgpu-openmp-system-arch.c
@@ -0,0 +1,15 @@
+// REQUIRES: system-linux
+// REQUIRES: x86-registered-target
+// REQUIRES: amdgpu-registered-target
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -nogpulib --amdgpu-arch-tool=%S/Inputs/amdgpu-arch/amdgpu_arch_gfx906 %s 2>&1 \
+// RUN:   | FileCheck %s
+// CHECK: clang{{.*}}"-cc1"{{.*}}"-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "[[GFX:gfx906]]"
+// CHECK: llvm-link{{.*}}"-o" "{{.*}}amdgpu-openmp-system-arch-{{.*}}-[[GFX]]-linked-{{.*}}.bc"
+// CHECK: llc{{.*}}amdgpu-openmp-system-arch-{{.*}}-[[GFX]]-linked-{{.*}}.bc" "-mtriple=amdgcn-amd-amdhsa" "-mcpu=[[GFX]]" "-filetype=obj" "-o"{{.*}}amdgpu-openmp-system-arch-{{.*}}-[[GFX]]-{{.*}}.o"
+
+// case when amdgpu_arch returns multiple gpus but of same arch
+// RUN:   %clang -### 

[PATCH] D99949: [AMDGPU][OpenMP] Add amdgpu-arch tool to list AMD GPUs installed

2021-04-09 Thread Pushpinder Singh via Phabricator via cfe-commits
pdhaliwal updated this revision to Diff 336353.
pdhaliwal marked an inline comment as done.
pdhaliwal added a comment.

Fix permissions


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D99949/new/

https://reviews.llvm.org/D99949

Files:
  clang/include/clang/Basic/DiagnosticDriverKinds.td
  clang/include/clang/Driver/Options.td
  clang/lib/Driver/ToolChains/AMDGPU.cpp
  clang/lib/Driver/ToolChains/AMDGPU.h
  clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
  clang/test/Driver/Inputs/amdgpu-arch/amdgpu_arch_different
  clang/test/Driver/Inputs/amdgpu-arch/amdgpu_arch_fail
  clang/test/Driver/Inputs/amdgpu-arch/amdgpu_arch_gfx906
  clang/test/Driver/Inputs/amdgpu-arch/amdgpu_arch_gfx908_gfx908
  clang/test/Driver/amdgpu-openmp-system-arch-fail.c
  clang/test/Driver/amdgpu-openmp-system-arch.c
  clang/tools/CMakeLists.txt
  clang/tools/amdgpu-arch/AMDGPUArch.cpp
  clang/tools/amdgpu-arch/CMakeLists.txt

Index: clang/tools/amdgpu-arch/CMakeLists.txt
===
--- /dev/null
+++ clang/tools/amdgpu-arch/CMakeLists.txt
@@ -0,0 +1,17 @@
+# //===--===//
+# //
+# // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# // See https://llvm.org/LICENSE.txt for details.
+# // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+# //
+# //===--===//
+
+find_package(hsa-runtime64 QUIET 1.2.0 HINTS ${CMAKE_INSTALL_PREFIX} PATHS /opt/rocm)
+if (NOT ${hsa-runtime64_FOUND})
+  message(STATUS "Not building amdgpu-arch: hsa-runtime64 not found")
+  return()
+endif()
+
+add_clang_tool(amdgpu-arch AMDGPUArch.cpp)
+
+clang_target_link_libraries(amdgpu-arch PRIVATE hsa-runtime64::hsa-runtime64)
Index: clang/tools/amdgpu-arch/AMDGPUArch.cpp
===
--- /dev/null
+++ clang/tools/amdgpu-arch/AMDGPUArch.cpp
@@ -0,0 +1,59 @@
+//===- AMDGPUArch.cpp - list AMDGPU installed --*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+// This file implements a tool for detecting name of AMDGPU installed in system
+// using HSA. This tool is used by AMDGPU OpenMP driver.
+//
+//===--===//
+
+#include 
+#include 
+#include 
+
+static hsa_status_t iterateAgentsCallback(hsa_agent_t Agent, void *Data) {
+  hsa_device_type_t DeviceType;
+  hsa_status_t Status =
+  hsa_agent_get_info(Agent, HSA_AGENT_INFO_DEVICE, );
+
+  // continue only if device type if GPU
+  if (Status != HSA_STATUS_SUCCESS || DeviceType != HSA_DEVICE_TYPE_GPU) {
+return Status;
+  }
+
+  std::vector *GPUs =
+  static_cast *>(Data);
+  char GPUName[64];
+  Status = hsa_agent_get_info(Agent, HSA_AGENT_INFO_NAME, GPUName);
+  if (Status != HSA_STATUS_SUCCESS) {
+return Status;
+  }
+  GPUs->push_back(GPUName);
+  return HSA_STATUS_SUCCESS;
+}
+
+int main() {
+  hsa_status_t Status = hsa_init();
+  if (Status != HSA_STATUS_SUCCESS) {
+return 1;
+  }
+
+  std::vector GPUs;
+  Status = hsa_iterate_agents(iterateAgentsCallback, );
+  if (Status != HSA_STATUS_SUCCESS) {
+return 1;
+  }
+
+  for (const auto  : GPUs)
+printf("%s\n", GPU.c_str());
+
+  if (GPUs.size() < 1)
+return 1;
+
+  hsa_shut_down();
+  return 0;
+}
Index: clang/tools/CMakeLists.txt
===
--- clang/tools/CMakeLists.txt
+++ clang/tools/CMakeLists.txt
@@ -43,3 +43,5 @@
 
 # libclang may require clang-tidy in clang-tools-extra.
 add_clang_subdirectory(libclang)
+
+add_clang_subdirectory(amdgpu-arch)
Index: clang/test/Driver/amdgpu-openmp-system-arch.c
===
--- /dev/null
+++ clang/test/Driver/amdgpu-openmp-system-arch.c
@@ -0,0 +1,15 @@
+// REQUIRES: system-linux
+// REQUIRES: x86-registered-target
+// REQUIRES: amdgpu-registered-target
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -nogpulib --amdgpu-arch-tool=%S/Inputs/amdgpu-arch/amdgpu_arch_gfx906 %s 2>&1 \
+// RUN:   | FileCheck %s
+// CHECK: clang{{.*}}"-cc1"{{.*}}"-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "[[GFX:gfx906]]"
+// CHECK: llvm-link{{.*}}"-o" "{{.*}}amdgpu-openmp-system-arch-{{.*}}-[[GFX]]-linked-{{.*}}.bc"
+// CHECK: llc{{.*}}amdgpu-openmp-system-arch-{{.*}}-[[GFX]]-linked-{{.*}}.bc" "-mtriple=amdgcn-amd-amdhsa" "-mcpu=[[GFX]]" "-filetype=obj" "-o"{{.*}}amdgpu-openmp-system-arch-{{.*}}-[[GFX]]-{{.*}}.o"
+
+// case when amdgpu_arch returns multiple gpus but of same arch
+// RUN:   

[PATCH] D99949: [AMDGPU][OpenMP] Add amdgpu-arch tool to list AMD GPUs installed

2021-04-09 Thread Pushpinder Singh via Phabricator via cfe-commits
pdhaliwal updated this revision to Diff 336351.
pdhaliwal added a comment.

Added tests for the failing cases


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D99949/new/

https://reviews.llvm.org/D99949

Files:
  clang/include/clang/Basic/DiagnosticDriverKinds.td
  clang/include/clang/Driver/Options.td
  clang/lib/Driver/ToolChains/AMDGPU.cpp
  clang/lib/Driver/ToolChains/AMDGPU.h
  clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
  clang/test/Driver/Inputs/amdgpu-arch/amdgpu_arch_different
  clang/test/Driver/Inputs/amdgpu-arch/amdgpu_arch_fail
  clang/test/Driver/Inputs/amdgpu-arch/amdgpu_arch_gfx906
  clang/test/Driver/Inputs/amdgpu-arch/amdgpu_arch_gfx908_gfx908
  clang/test/Driver/amdgpu-openmp-system-arch-fail.c
  clang/test/Driver/amdgpu-openmp-system-arch.c
  clang/tools/CMakeLists.txt
  clang/tools/amdgpu-arch/AMDGPUArch.cpp
  clang/tools/amdgpu-arch/CMakeLists.txt

Index: clang/tools/amdgpu-arch/CMakeLists.txt
===
--- /dev/null
+++ clang/tools/amdgpu-arch/CMakeLists.txt
@@ -0,0 +1,17 @@
+# //===--===//
+# //
+# // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# // See https://llvm.org/LICENSE.txt for details.
+# // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+# //
+# //===--===//
+
+find_package(hsa-runtime64 QUIET 1.2.0 HINTS ${CMAKE_INSTALL_PREFIX} PATHS /opt/rocm)
+if (NOT ${hsa-runtime64_FOUND})
+  message(STATUS "Not building amdgpu-arch: hsa-runtime64 not found")
+  return()
+endif()
+
+add_clang_tool(amdgpu-arch AMDGPUArch.cpp)
+
+clang_target_link_libraries(amdgpu-arch PRIVATE hsa-runtime64::hsa-runtime64)
Index: clang/tools/amdgpu-arch/AMDGPUArch.cpp
===
--- /dev/null
+++ clang/tools/amdgpu-arch/AMDGPUArch.cpp
@@ -0,0 +1,59 @@
+//===- AMDGPUArch.cpp - list AMDGPU installed --*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+// This file implements a tool for detecting name of AMDGPU installed in system
+// using HSA. This tool is used by AMDGPU OpenMP driver.
+//
+//===--===//
+
+#include 
+#include 
+#include 
+
+static hsa_status_t iterateAgentsCallback(hsa_agent_t Agent, void *Data) {
+  hsa_device_type_t DeviceType;
+  hsa_status_t Status =
+  hsa_agent_get_info(Agent, HSA_AGENT_INFO_DEVICE, );
+
+  // continue only if device type if GPU
+  if (Status != HSA_STATUS_SUCCESS || DeviceType != HSA_DEVICE_TYPE_GPU) {
+return Status;
+  }
+
+  std::vector *GPUs =
+  static_cast *>(Data);
+  char GPUName[64];
+  Status = hsa_agent_get_info(Agent, HSA_AGENT_INFO_NAME, GPUName);
+  if (Status != HSA_STATUS_SUCCESS) {
+return Status;
+  }
+  GPUs->push_back(GPUName);
+  return HSA_STATUS_SUCCESS;
+}
+
+int main() {
+  hsa_status_t Status = hsa_init();
+  if (Status != HSA_STATUS_SUCCESS) {
+return 1;
+  }
+
+  std::vector GPUs;
+  Status = hsa_iterate_agents(iterateAgentsCallback, );
+  if (Status != HSA_STATUS_SUCCESS) {
+return 1;
+  }
+
+  for (const auto  : GPUs)
+printf("%s\n", GPU.c_str());
+
+  if (GPUs.size() < 1)
+return 1;
+
+  hsa_shut_down();
+  return 0;
+}
Index: clang/tools/CMakeLists.txt
===
--- clang/tools/CMakeLists.txt
+++ clang/tools/CMakeLists.txt
@@ -43,3 +43,5 @@
 
 # libclang may require clang-tidy in clang-tools-extra.
 add_clang_subdirectory(libclang)
+
+add_clang_subdirectory(amdgpu-arch)
Index: clang/test/Driver/amdgpu-openmp-system-arch.c
===
--- /dev/null
+++ clang/test/Driver/amdgpu-openmp-system-arch.c
@@ -0,0 +1,15 @@
+// REQUIRES: system-linux
+// REQUIRES: x86-registered-target
+// REQUIRES: amdgpu-registered-target
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -nogpulib --amdgpu-arch-tool=%S/Inputs/amdgpu-arch/amdgpu_arch_gfx906 %s 2>&1 \
+// RUN:   | FileCheck %s
+// CHECK: clang{{.*}}"-cc1"{{.*}}"-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "[[GFX:gfx906]]"
+// CHECK: llvm-link{{.*}}"-o" "{{.*}}amdgpu-openmp-system-arch-{{.*}}-[[GFX]]-linked-{{.*}}.bc"
+// CHECK: llc{{.*}}amdgpu-openmp-system-arch-{{.*}}-[[GFX]]-linked-{{.*}}.bc" "-mtriple=amdgcn-amd-amdhsa" "-mcpu=[[GFX]]" "-filetype=obj" "-o"{{.*}}amdgpu-openmp-system-arch-{{.*}}-[[GFX]]-{{.*}}.o"
+
+// case when amdgpu_arch returns multiple gpus but of same arch
+// RUN:   %clang -### 

[PATCH] D99949: [AMDGPU][OpenMP] Add amdgpu-arch tool to list AMD GPUs installed

2021-04-08 Thread Pushpinder Singh via Phabricator via cfe-commits
pdhaliwal updated this revision to Diff 336037.
pdhaliwal added a comment.
Herald added subscribers: jansvoboda11, dang.

- Addressed review comments
- Added LIT test case


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D99949/new/

https://reviews.llvm.org/D99949

Files:
  clang/include/clang/Driver/Options.td
  clang/lib/Driver/ToolChains/AMDGPU.cpp
  clang/lib/Driver/ToolChains/AMDGPU.h
  clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
  clang/test/Driver/Inputs/amdgpu-arch/amdgpu_arch_gfx906
  clang/test/Driver/Inputs/amdgpu-arch/amdgpu_arch_gfx908_gfx908
  clang/test/Driver/amdgpu-openmp-system-arch.c
  clang/tools/CMakeLists.txt
  clang/tools/amdgpu-arch/AMDGPUArch.cpp
  clang/tools/amdgpu-arch/CMakeLists.txt

Index: clang/tools/amdgpu-arch/CMakeLists.txt
===
--- /dev/null
+++ clang/tools/amdgpu-arch/CMakeLists.txt
@@ -0,0 +1,17 @@
+# //===--===//
+# //
+# // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# // See https://llvm.org/LICENSE.txt for details.
+# // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+# //
+# //===--===//
+
+find_package(hsa-runtime64 QUIET 1.2.0 HINTS ${CMAKE_INSTALL_PREFIX} PATHS /opt/rocm)
+if (NOT ${hsa-runtime64_FOUND})
+  message(STATUS "Not building amdgpu-arch: hsa-runtime64 not found")
+  return()
+endif()
+
+add_clang_tool(amdgpu-arch AMDGPUArch.cpp)
+
+clang_target_link_libraries(amdgpu-arch PRIVATE hsa-runtime64::hsa-runtime64)
Index: clang/tools/amdgpu-arch/AMDGPUArch.cpp
===
--- /dev/null
+++ clang/tools/amdgpu-arch/AMDGPUArch.cpp
@@ -0,0 +1,59 @@
+//===- AMDGPUArch.cpp - list AMDGPU installed --*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+// This file implements a tool for detecting name of AMDGPU installed in system
+// using HSA. This tool is used by AMDGPU OpenMP driver.
+//
+//===--===//
+
+#include 
+#include 
+#include 
+
+static hsa_status_t iterateAgentsCallback(hsa_agent_t Agent, void *Data) {
+  hsa_device_type_t DeviceType;
+  hsa_status_t Status =
+  hsa_agent_get_info(Agent, HSA_AGENT_INFO_DEVICE, );
+
+  // continue only if device type if GPU
+  if (Status != HSA_STATUS_SUCCESS || DeviceType != HSA_DEVICE_TYPE_GPU) {
+return Status;
+  }
+
+  std::vector *GPUs =
+  static_cast *>(Data);
+  char GPUName[64];
+  Status = hsa_agent_get_info(Agent, HSA_AGENT_INFO_NAME, GPUName);
+  if (Status != HSA_STATUS_SUCCESS) {
+return Status;
+  }
+  GPUs->push_back(GPUName);
+  return HSA_STATUS_SUCCESS;
+}
+
+int main() {
+  hsa_status_t Status = hsa_init();
+  if (Status != HSA_STATUS_SUCCESS) {
+return 1;
+  }
+
+  std::vector GPUs;
+  Status = hsa_iterate_agents(iterateAgentsCallback, );
+  if (Status != HSA_STATUS_SUCCESS) {
+return 1;
+  }
+
+  for (const auto  : GPUs)
+printf("%s\n", GPU.c_str());
+
+  if (GPUs.size() < 1)
+return 1;
+
+  hsa_shut_down();
+  return 0;
+}
Index: clang/tools/CMakeLists.txt
===
--- clang/tools/CMakeLists.txt
+++ clang/tools/CMakeLists.txt
@@ -43,3 +43,5 @@
 
 # libclang may require clang-tidy in clang-tools-extra.
 add_clang_subdirectory(libclang)
+
+add_clang_subdirectory(amdgpu-arch)
Index: clang/test/Driver/amdgpu-openmp-system-arch.c
===
--- /dev/null
+++ clang/test/Driver/amdgpu-openmp-system-arch.c
@@ -0,0 +1,17 @@
+// REQUIRES: system-linux
+// REQUIRES: x86-registered-target
+// REQUIRES: amdgpu-registered-target
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -nogpulib --amdgpu-arch-tool-path=%S/Inputs/amdgpu-arch/amdgpu_arch_gfx906 %s 2>&1 \
+// RUN:   | FileCheck %s
+// CHECK: clang{{.*}}"-cc1"{{.*}}"-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "[[GFX:gfx906]]" "-fcuda-is-device"
+// CHECK: llvm-link{{.*}}"-o" "{{.*}}amdgpu-openmp-system-arch-{{.*}}-[[GFX]]-linked-{{.*}}.bc"
+// CHECK: llc{{.*}}amdgpu-openmp-system-arch-{{.*}}-[[GFX]]-linked-{{.*}}.bc" "-mtriple=amdgcn-amd-amdhsa" "-mcpu=[[GFX]]" "-filetype=obj" "-o"{{.*}}amdgpu-openmp-system-arch-{{.*}}-[[GFX]]-{{.*}}.o"
+
+// case when amdgpu_arch returns multiple gpus but of same arch
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -nogpulib 

[PATCH] D99949: [AMDGPU][OpenMP] Add amdgpu-arch tool to list AMD GPUs installed

2021-04-07 Thread Pushpinder Singh via Phabricator via cfe-commits
pdhaliwal added a comment.

On the testing perspective, the tool




Comment at: clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp:208
   assert(GPUArch.startswith("gfx") && "Unsupported sub arch");
+  assert(!GPUArch.empty() && "Unable to detect system GPU");
 

JonChesterfield wrote:
> We shouldn't be handling unknown or missing march= fields with asserts. I see 
> that this is already the case in multiple places, so let's go with a matching 
> assert for this and aspire to fix that in a separate patch.
Matched this one with below.



Comment at: clang/tools/amdgpu-arch/AMDGPUArch.cpp:34
+char GPUName[64];
+Status = hsa_agent_get_info(Agent, HSA_AGENT_INFO_NAME, GPUName);
+if (Status != HSA_STATUS_SUCCESS) {

JonChesterfield wrote:
> Does this null terminate for any length of GPU name? Wondering if we should 
> explicitly zero out the last char.
Checked the rocr-runtime, the output is null terminated.



Comment at: clang/tools/amdgpu-arch/AMDGPUArch.cpp:45
+if (Status != HSA_STATUS_SUCCESS) {
+  fprintf(stderr, "Unable to initialize HSA\n");
+}

JonChesterfield wrote:
> Unsure these should be writing to stderr. We capture stdout, stderr probably 
> goes to the user. We could exit 1 instead as clang is going to treat any 
> failure to guess the arch identically 
Remove fprintf


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D99949/new/

https://reviews.llvm.org/D99949

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D99949: [AMDGPU][OpenMP] Add amdgpu-arch tool to list AMD GPUs installed

2021-04-07 Thread Pushpinder Singh via Phabricator via cfe-commits
pdhaliwal updated this revision to Diff 335761.
pdhaliwal marked 10 inline comments as done.
pdhaliwal added a comment.

Addressed review comments.

RE test: Since the tool is contingent on the results of HSA API call, adding a 
test
which would always PASS on all the systems with different AMD GPUs as well as 
always ignored on systems
with non AMDGPUs would not work. I welcome suggestions on how to resolve this.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D99949/new/

https://reviews.llvm.org/D99949

Files:
  clang/lib/Driver/ToolChains/AMDGPU.cpp
  clang/lib/Driver/ToolChains/AMDGPU.h
  clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
  clang/tools/CMakeLists.txt
  clang/tools/amdgpu-arch/AMDGPUArch.cpp
  clang/tools/amdgpu-arch/CMakeLists.txt

Index: clang/tools/amdgpu-arch/CMakeLists.txt
===
--- /dev/null
+++ clang/tools/amdgpu-arch/CMakeLists.txt
@@ -0,0 +1,17 @@
+# //===--===//
+# //
+# // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# // See https://llvm.org/LICENSE.txt for details.
+# // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+# //
+# //===--===//
+
+find_package(hsa-runtime64 QUIET 1.2.0 HINTS ${CMAKE_INSTALL_PREFIX} PATHS /opt/rocm)
+if (NOT ${hsa-runtime64_FOUND})
+  message(STATUS "Not building amdgpu-arch: hsa-runtime64 not found")
+  return()
+endif()
+
+add_clang_tool(amdgpu-arch AMDGPUArch.cpp)
+
+clang_target_link_libraries(amdgpu-arch PRIVATE hsa-runtime64::hsa-runtime64)
Index: clang/tools/amdgpu-arch/AMDGPUArch.cpp
===
--- /dev/null
+++ clang/tools/amdgpu-arch/AMDGPUArch.cpp
@@ -0,0 +1,59 @@
+//===- AMDGPUArch.cpp - list AMDGPU installed --*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+// This file implements a tool for detecting name of AMDGPU installed in system
+// using HSA. This tool is used by AMDGPU OpenMP driver.
+//
+//===--===//
+
+#include 
+#include 
+#include 
+
+static hsa_status_t iterateAgentsCallback(hsa_agent_t Agent, void *Data) {
+  hsa_device_type_t DeviceType;
+  hsa_status_t Status =
+  hsa_agent_get_info(Agent, HSA_AGENT_INFO_DEVICE, );
+
+  // continue only if device type if GPU
+  if (Status != HSA_STATUS_SUCCESS || DeviceType != HSA_DEVICE_TYPE_GPU) {
+return Status;
+  }
+
+  std::vector *GPUs =
+  static_cast *>(Data);
+  char GPUName[64];
+  Status = hsa_agent_get_info(Agent, HSA_AGENT_INFO_NAME, GPUName);
+  if (Status != HSA_STATUS_SUCCESS) {
+return Status;
+  }
+  GPUs->push_back(GPUName);
+  return HSA_STATUS_SUCCESS;
+}
+
+int main() {
+  hsa_status_t Status = hsa_init();
+  if (Status != HSA_STATUS_SUCCESS) {
+fprintf(stderr, "Unable to initialize HSA\n");
+  }
+
+  std::vector GPUs;
+  Status = hsa_iterate_agents(iterateAgentsCallback, );
+  if (Status != HSA_STATUS_SUCCESS) {
+return 1;
+  }
+
+  for (unsigned I = 0; I < GPUs.size(); I++) {
+printf("%s\n", GPUs[I].c_str());
+  }
+  if (GPUs.size() < 1)
+return 1;
+
+  hsa_shut_down();
+  return 0;
+}
Index: clang/tools/CMakeLists.txt
===
--- clang/tools/CMakeLists.txt
+++ clang/tools/CMakeLists.txt
@@ -43,3 +43,5 @@
 
 # libclang may require clang-tidy in clang-tools-extra.
 add_clang_subdirectory(libclang)
+
+add_clang_subdirectory(amdgpu-arch)
Index: clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
===
--- clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
+++ clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
@@ -145,10 +145,16 @@
 const InputInfoList ,
 const ArgList ,
 const char *LinkingOutput) const {
+  const ToolChain  = getToolChain();
   assert(getToolChain().getTriple().isAMDGCN() && "Unsupported target");
 
+  const toolchains::AMDGPUOpenMPToolChain  =
+  static_cast(TC);
   StringRef GPUArch = Args.getLastArgValue(options::OPT_march_EQ);
-  assert(GPUArch.startswith("gfx") && "Unsupported sub arch");
+  if (GPUArch.empty()) {
+GPUArch = AMDGPUOpenMPTC.getSystemGPUArch();
+  }
+  assert(!GPUArch.empty() && "Must have an explicit GPU arch.");
 
   // Prefix for temporary file name.
   std::string Prefix;
@@ -187,6 +193,11 @@
   HostTC.addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadingKind);
 
   StringRef 

[PATCH] D99949: [AMDGPU][OpenMP] Add amdgpu-arch tool to list AMD GPUs installed

2021-04-06 Thread Pushpinder Singh via Phabricator via cfe-commits
pdhaliwal planned changes to this revision.
pdhaliwal added a comment.

Working on tests.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D99949/new/

https://reviews.llvm.org/D99949

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D99949: [AMDGPU][OpenMP] Add amdgpu-arch tool to list AMD GPUs installed

2021-04-06 Thread Pushpinder Singh via Phabricator via cfe-commits
pdhaliwal created this revision.
pdhaliwal added reviewers: JonChesterfield, ronlieb, jdoerfert, ABataev, 
gregrodgers.
Herald added subscribers: kerbowa, guansong, t-tye, tpr, dstuttard, yaxunl, 
mgorny, nhaehnle, jvesely, kzhuravl.
pdhaliwal requested review of this revision.
Herald added subscribers: cfe-commits, sstefan1, wdng.
Herald added a project: clang.

This patch adds new clang tool named amdgpu-arch which uses
HSA to detect installed AMDGPU and report back latter's march.
This tool is built only if system has HSA installed.

The value printed by amdgpu-arch is used to fill -march when
latter is not explicitly provided in -Xopenmp-target.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D99949

Files:
  clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
  clang/tools/CMakeLists.txt
  clang/tools/amdgpu-arch/AMDGPUArch.cpp
  clang/tools/amdgpu-arch/CMakeLists.txt

Index: clang/tools/amdgpu-arch/CMakeLists.txt
===
--- /dev/null
+++ clang/tools/amdgpu-arch/CMakeLists.txt
@@ -0,0 +1,19 @@
+# //===--===//
+# //
+# // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# // See https://llvm.org/LICENSE.txt for details.
+# // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+# //
+# //===--===//
+
+include_directories(${LIBOMP_INCLUDE_DIR})
+
+find_package(hsa-runtime64 QUIET 1.2.0 HINTS ${CMAKE_INSTALL_PREFIX} PATHS /opt/rocm)
+if (NOT ${hsa-runtime64_FOUND})
+  message(INFO "Not building amdgpu-arch: hsa-runtime64 not found")
+  return()
+endif()
+  
+add_clang_tool(amdgpu-arch AMDGPUArch.cpp)
+
+clang_target_link_libraries(amdgpu-arch PRIVATE hsa-runtime64::hsa-runtime64)
Index: clang/tools/amdgpu-arch/AMDGPUArch.cpp
===
--- /dev/null
+++ clang/tools/amdgpu-arch/AMDGPUArch.cpp
@@ -0,0 +1,76 @@
+//===- AMDGPUArch.cpp - list AMDGPU installed --*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+// This file implements a tool for detecting name of AMDGPU installed in system
+// using HSA. This tool is used by AMDGPU OpenMP driver.
+//
+//===--===//
+
+#include 
+#include 
+#include 
+
+namespace {
+
+class HSAAgentCollector {
+public:
+  static hsa_status_t iterateAgentsCallback(hsa_agent_t Agent, void *Data) {
+hsa_device_type_t DeviceType;
+hsa_status_t Status =
+hsa_agent_get_info(Agent, HSA_AGENT_INFO_DEVICE, );
+
+// continue only if device type if GPU
+if (Status != HSA_STATUS_SUCCESS || DeviceType != HSA_DEVICE_TYPE_GPU) {
+  return Status;
+}
+
+HSAAgentCollector *Self = static_cast(Data);
+char GPUName[64];
+Status = hsa_agent_get_info(Agent, HSA_AGENT_INFO_NAME, GPUName);
+if (Status != HSA_STATUS_SUCCESS) {
+  return Status;
+}
+Self->GPUs.push_back(GPUName);
+return HSA_STATUS_SUCCESS;
+  }
+
+  int execute() {
+hsa_status_t Status = hsa_init();
+if (Status != HSA_STATUS_SUCCESS) {
+  fprintf(stderr, "Unable to initialize HSA\n");
+}
+
+Status = hsa_iterate_agents(HSAAgentCollector::iterateAgentsCallback, this);
+if (Status != HSA_STATUS_SUCCESS) {
+  fprintf(stderr, "Error in hsa_iterate_agents\n");
+  return 1;
+}
+
+for (unsigned I = 0; I < GPUs.size(); I++) {
+  printf("%s", GPUs[I].c_str());
+  if (I != GPUs.size() - 1) {
+printf(",");
+  }
+}
+if (GPUs.size() < 1)
+  return 1;
+
+hsa_shut_down();
+return 0;
+  }
+
+private:
+  std::vector GPUs;
+};
+
+} // namespace
+
+int main() {
+  HSAAgentCollector Collector;
+  return Collector.execute();
+}
Index: clang/tools/CMakeLists.txt
===
--- clang/tools/CMakeLists.txt
+++ clang/tools/CMakeLists.txt
@@ -43,3 +43,5 @@
 
 # libclang may require clang-tidy in clang-tools-extra.
 add_clang_subdirectory(libclang)
+
+add_clang_subdirectory(amdgpu-arch)
Index: clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
===
--- clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
+++ clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
@@ -15,7 +15,11 @@
 #include "clang/Driver/DriverDiagnostic.h"
 #include "clang/Driver/Options.h"
 #include "llvm/Support/FileSystem.h"
+#include "llvm/Support/FileUtilities.h"
+#include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Path.h"
+#include "llvm/Support/Program.h"
+#include 
 
 using namespace clang::driver;
 using 

[PATCH] D99402: [AMDGPU][OpenMP] Add /include to the search path

2021-03-29 Thread Pushpinder Singh via Phabricator via cfe-commits
pdhaliwal added a comment.

The general problem seems bit more involved. I am not that familiar with how 
other architectures/systems handle the library/include path.  Simplest solution 
that I can propose right now is to generalise my revision to other 
architectures for header lookup and similarly for library lookup in case of 
openmp. If there is better solution available please let me know I will be 
happy to implement it.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D99402/new/

https://reviews.llvm.org/D99402

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D99402: [AMDGPU][OpenMP] Add /include to the search path

2021-03-26 Thread Pushpinder Singh via Phabricator via cfe-commits
pdhaliwal added a comment.

I was not aware that it was a general problem. I will check if I could get a 
general solution.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D99402/new/

https://reviews.llvm.org/D99402

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D99402: [AMDGPU][OpenMP] Add /include to the search path

2021-03-26 Thread Pushpinder Singh via Phabricator via cfe-commits
pdhaliwal created this revision.
pdhaliwal added reviewers: ronlieb, JonChesterfield, jdoerfert.
Herald added subscribers: kerbowa, guansong, t-tye, tpr, dstuttard, yaxunl, 
nhaehnle, jvesely, kzhuravl.
pdhaliwal requested review of this revision.
Herald added subscribers: cfe-commits, sstefan1, wdng.
Herald added a project: clang.

Fixes the include error for omp.h as compiler was not looking in the
correct directory.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D99402

Files:
  clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
  clang/test/Driver/amdgpu-openmp-toolchain.c


Index: clang/test/Driver/amdgpu-openmp-toolchain.c
===
--- clang/test/Driver/amdgpu-openmp-toolchain.c
+++ clang/test/Driver/amdgpu-openmp-toolchain.c
@@ -4,7 +4,7 @@
 // RUN:   | FileCheck %s
 
 // verify the tools invocations
-// CHECK: clang{{.*}}"-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-x" 
"c"{{.*}}
+// CHECK: clang{{.*}}"-cc1" "-triple" 
"x86_64-unknown-linux-gnu"{{.*}}"-internal-isystem" 
"{{.*}}/bin/../include"{{.*}}"-x" "c"{{.*}}
 // CHECK: clang{{.*}}"-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-x" 
"ir"{{.*}}
 // CHECK: clang{{.*}}"-cc1"{{.*}}"-triple" 
"amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx906" "-fcuda-is-device" 
"-mlink-builtin-bitcode"{{.*}}libomptarget-amdgcn-gfx906.bc"{{.*}}
 // CHECK: llvm-link{{.*}}"-o" 
"{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906-linked-{{.*}}.bc"
Index: clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
===
--- clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
+++ clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
@@ -244,6 +244,11 @@
 
 void AMDGPUOpenMPToolChain::AddClangSystemIncludeArgs(
 const ArgList , ArgStringList ) const {
+  const Driver  = HostTC.getDriver();
+  CC1Args.push_back("-internal-isystem");
+  SmallString<128> P(D.Dir);
+  llvm::sys::path::append(P, "..", "include");
+  CC1Args.push_back(DriverArgs.MakeArgString(P));
   HostTC.AddClangSystemIncludeArgs(DriverArgs, CC1Args);
 }
 


Index: clang/test/Driver/amdgpu-openmp-toolchain.c
===
--- clang/test/Driver/amdgpu-openmp-toolchain.c
+++ clang/test/Driver/amdgpu-openmp-toolchain.c
@@ -4,7 +4,7 @@
 // RUN:   | FileCheck %s
 
 // verify the tools invocations
-// CHECK: clang{{.*}}"-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-x" "c"{{.*}}
+// CHECK: clang{{.*}}"-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-internal-isystem" "{{.*}}/bin/../include"{{.*}}"-x" "c"{{.*}}
 // CHECK: clang{{.*}}"-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-x" "ir"{{.*}}
 // CHECK: clang{{.*}}"-cc1"{{.*}}"-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx906" "-fcuda-is-device" "-mlink-builtin-bitcode"{{.*}}libomptarget-amdgcn-gfx906.bc"{{.*}}
 // CHECK: llvm-link{{.*}}"-o" "{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906-linked-{{.*}}.bc"
Index: clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
===
--- clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
+++ clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
@@ -244,6 +244,11 @@
 
 void AMDGPUOpenMPToolChain::AddClangSystemIncludeArgs(
 const ArgList , ArgStringList ) const {
+  const Driver  = HostTC.getDriver();
+  CC1Args.push_back("-internal-isystem");
+  SmallString<128> P(D.Dir);
+  llvm::sys::path::append(P, "..", "include");
+  CC1Args.push_back(DriverArgs.MakeArgString(P));
   HostTC.AddClangSystemIncludeArgs(DriverArgs, CC1Args);
 }
 
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D96769: [OpenMP][AMDGPU] Skip backend and assemble phases for amdgcn

2021-03-15 Thread Pushpinder Singh via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rGfc12a64ecc71: [OpenMP][AMDGPU] Skip backend and assemble 
phases for amdgcn (authored by pdhaliwal).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D96769/new/

https://reviews.llvm.org/D96769

Files:
  clang/lib/Driver/Driver.cpp
  clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
  clang/test/Driver/amdgpu-openmp-toolchain.c

Index: clang/test/Driver/amdgpu-openmp-toolchain.c
===
--- clang/test/Driver/amdgpu-openmp-toolchain.c
+++ clang/test/Driver/amdgpu-openmp-toolchain.c
@@ -1,3 +1,4 @@
+// REQUIRES: x86-registered-target
 // REQUIRES: amdgpu-registered-target
 // RUN:   env LIBRARY_PATH=%S/Inputs/hip_dev_lib %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx906 %s 2>&1 \
 // RUN:   | FileCheck %s
@@ -5,7 +6,7 @@
 // verify the tools invocations
 // CHECK: clang{{.*}}"-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-x" "c"{{.*}}
 // CHECK: clang{{.*}}"-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-x" "ir"{{.*}}
-// CHECK: clang{{.*}}"-cc1"{{.*}}"-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx906" "-fcuda-is-device" "-emit-llvm-bc" "-mlink-builtin-bitcode"{{.*}}libomptarget-amdgcn-gfx906.bc"{{.*}}
+// CHECK: clang{{.*}}"-cc1"{{.*}}"-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx906" "-fcuda-is-device" "-mlink-builtin-bitcode"{{.*}}libomptarget-amdgcn-gfx906.bc"{{.*}}
 // CHECK: llvm-link{{.*}}"-o" "{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906-linked-{{.*}}.bc"
 // CHECK: llc{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906-linked-{{.*}}.bc" "-mtriple=amdgcn-amd-amdhsa" "-mcpu=gfx906" "-filetype=obj" "-o"{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906-{{.*}}.o"
 // CHECK: lld{{.*}}"-flavor" "gnu" "--no-undefined" "-shared" "-o"{{.*}}amdgpu-openmp-toolchain-{{.*}}.out" "{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906-{{.*}}.o"
@@ -25,18 +26,35 @@
 // CHECK-PHASES: 6: preprocessor, {5}, cpp-output, (device-openmp)
 // CHECK-PHASES: 7: compiler, {6}, ir, (device-openmp)
 // CHECK-PHASES: 8: offload, "host-openmp (x86_64-unknown-linux-gnu)" {2}, "device-openmp (amdgcn-amd-amdhsa)" {7}, ir
-// CHECK-PHASES: 9: backend, {8}, assembler, (device-openmp)
-// CHECK-PHASES: 10: assembler, {9}, object, (device-openmp)
-// CHECK-PHASES: 11: linker, {10}, image, (device-openmp)
-// CHECK-PHASES: 12: offload, "device-openmp (amdgcn-amd-amdhsa)" {11}, image
-// CHECK-PHASES: 13: clang-offload-wrapper, {12}, ir, (host-openmp)
-// CHECK-PHASES: 14: backend, {13}, assembler, (host-openmp)
-// CHECK-PHASES: 15: assembler, {14}, object, (host-openmp)
-// CHECK-PHASES: 16: linker, {4, 15}, image, (host-openmp)
+// CHECK-PHASES: 9: linker, {8}, image, (device-openmp)
+// CHECK-PHASES: 10: offload, "device-openmp (amdgcn-amd-amdhsa)" {9}, image
+// CHECK-PHASES: 11: clang-offload-wrapper, {10}, ir, (host-openmp)
+// CHECK-PHASES: 12: backend, {11}, assembler, (host-openmp)
+// CHECK-PHASES: 13: assembler, {12}, object, (host-openmp)
+// CHECK-PHASES: 14: linker, {4, 13}, image, (host-openmp)
 
 // handling of --libomptarget-amdgcn-bc-path
 // RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx803 --libomptarget-amdgcn-bc-path=%S/Inputs/hip_dev_lib/libomptarget-amdgcn-gfx803.bc %s 2>&1 | FileCheck %s --check-prefix=CHECK-LIBOMPTARGET
-// CHECK-LIBOMPTARGET: clang{{.*}}"-cc1"{{.*}}"-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx803" "-fcuda-is-device" "-emit-llvm-bc" "-mlink-builtin-bitcode"{{.*}}Inputs/hip_dev_lib/libomptarget-amdgcn-gfx803.bc"{{.*}}
+// CHECK-LIBOMPTARGET: clang{{.*}}"-cc1"{{.*}}"-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx803" "-fcuda-is-device" "-mlink-builtin-bitcode"{{.*}}Inputs/hip_dev_lib/libomptarget-amdgcn-gfx803.bc"{{.*}}
 
 // RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx803 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-NOGPULIB
-// CHECK-NOGPULIB-NOT: clang{{.*}}"-cc1"{{.*}}"-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx803" "-fcuda-is-device" "-emit-llvm-bc" "-mlink-builtin-bitcode"{{.*}}libomptarget-amdgcn-gfx803.bc"{{.*}}
+// CHECK-NOGPULIB-NOT: clang{{.*}}"-cc1"{{.*}}"-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx803" "-fcuda-is-device" "-mlink-builtin-bitcode"{{.*}}libomptarget-amdgcn-gfx803.bc"{{.*}}
+
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -save-temps -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx803 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-PRINT-BINDINGS
+// CHECK-PRINT-BINDINGS: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT:.*]]"],
+// 

[PATCH] D96769: [OpenMP][AMDGPU] Skip backend and assemble phases for amdgcn

2021-02-25 Thread Pushpinder Singh via Phabricator via cfe-commits
pdhaliwal updated this revision to Diff 326392.
pdhaliwal added a comment.

Add extra llc step to produce assembly in the linker.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D96769/new/

https://reviews.llvm.org/D96769

Files:
  clang/lib/Driver/Driver.cpp
  clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
  clang/test/Driver/amdgpu-openmp-toolchain.c

Index: clang/test/Driver/amdgpu-openmp-toolchain.c
===
--- clang/test/Driver/amdgpu-openmp-toolchain.c
+++ clang/test/Driver/amdgpu-openmp-toolchain.c
@@ -1,3 +1,4 @@
+// REQUIRES: x86-registered-target
 // REQUIRES: amdgpu-registered-target
 // RUN:   env LIBRARY_PATH=%S/Inputs/hip_dev_lib %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx906 %s 2>&1 \
 // RUN:   | FileCheck %s
@@ -5,7 +6,7 @@
 // verify the tools invocations
 // CHECK: clang{{.*}}"-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-x" "c"{{.*}}
 // CHECK: clang{{.*}}"-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-x" "ir"{{.*}}
-// CHECK: clang{{.*}}"-cc1"{{.*}}"-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx906" "-fcuda-is-device" "-emit-llvm-bc" "-mlink-builtin-bitcode"{{.*}}libomptarget-amdgcn-gfx906.bc"{{.*}}
+// CHECK: clang{{.*}}"-cc1"{{.*}}"-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx906" "-fcuda-is-device" "-mlink-builtin-bitcode"{{.*}}libomptarget-amdgcn-gfx906.bc"{{.*}}
 // CHECK: llvm-link{{.*}}"-o" "{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906-linked-{{.*}}.bc"
 // CHECK: llc{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906-linked-{{.*}}.bc" "-mtriple=amdgcn-amd-amdhsa" "-mcpu=gfx906" "-filetype=obj" "-o"{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906-{{.*}}.o"
 // CHECK: lld{{.*}}"-flavor" "gnu" "--no-undefined" "-shared" "-o"{{.*}}amdgpu-openmp-toolchain-{{.*}}.out" "{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906-{{.*}}.o"
@@ -25,18 +26,35 @@
 // CHECK-PHASES: 6: preprocessor, {5}, cpp-output, (device-openmp)
 // CHECK-PHASES: 7: compiler, {6}, ir, (device-openmp)
 // CHECK-PHASES: 8: offload, "host-openmp (x86_64-unknown-linux-gnu)" {2}, "device-openmp (amdgcn-amd-amdhsa)" {7}, ir
-// CHECK-PHASES: 9: backend, {8}, assembler, (device-openmp)
-// CHECK-PHASES: 10: assembler, {9}, object, (device-openmp)
-// CHECK-PHASES: 11: linker, {10}, image, (device-openmp)
-// CHECK-PHASES: 12: offload, "device-openmp (amdgcn-amd-amdhsa)" {11}, image
-// CHECK-PHASES: 13: clang-offload-wrapper, {12}, ir, (host-openmp)
-// CHECK-PHASES: 14: backend, {13}, assembler, (host-openmp)
-// CHECK-PHASES: 15: assembler, {14}, object, (host-openmp)
-// CHECK-PHASES: 16: linker, {4, 15}, image, (host-openmp)
+// CHECK-PHASES: 9: linker, {8}, image, (device-openmp)
+// CHECK-PHASES: 10: offload, "device-openmp (amdgcn-amd-amdhsa)" {9}, image
+// CHECK-PHASES: 11: clang-offload-wrapper, {10}, ir, (host-openmp)
+// CHECK-PHASES: 12: backend, {11}, assembler, (host-openmp)
+// CHECK-PHASES: 13: assembler, {12}, object, (host-openmp)
+// CHECK-PHASES: 14: linker, {4, 13}, image, (host-openmp)
 
 // handling of --libomptarget-amdgcn-bc-path
 // RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx803 --libomptarget-amdgcn-bc-path=%S/Inputs/hip_dev_lib/libomptarget-amdgcn-gfx803.bc %s 2>&1 | FileCheck %s --check-prefix=CHECK-LIBOMPTARGET
-// CHECK-LIBOMPTARGET: clang{{.*}}"-cc1"{{.*}}"-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx803" "-fcuda-is-device" "-emit-llvm-bc" "-mlink-builtin-bitcode"{{.*}}Inputs/hip_dev_lib/libomptarget-amdgcn-gfx803.bc"{{.*}}
+// CHECK-LIBOMPTARGET: clang{{.*}}"-cc1"{{.*}}"-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx803" "-fcuda-is-device" "-mlink-builtin-bitcode"{{.*}}Inputs/hip_dev_lib/libomptarget-amdgcn-gfx803.bc"{{.*}}
 
 // RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx803 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-NOGPULIB
-// CHECK-NOGPULIB-NOT: clang{{.*}}"-cc1"{{.*}}"-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx803" "-fcuda-is-device" "-emit-llvm-bc" "-mlink-builtin-bitcode"{{.*}}libomptarget-amdgcn-gfx803.bc"{{.*}}
+// CHECK-NOGPULIB-NOT: clang{{.*}}"-cc1"{{.*}}"-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx803" "-fcuda-is-device" "-mlink-builtin-bitcode"{{.*}}libomptarget-amdgcn-gfx803.bc"{{.*}}
+
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -save-temps -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx803 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-PRINT-BINDINGS
+// CHECK-PRINT-BINDINGS: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT:.*]]"],
+// CHECK-PRINT-BINDINGS: "x86_64-unknown-linux-gnu" - "clang",{{.*}} output: "[[HOST_BC:.*]]"
+// CHECK-PRINT-BINDINGS: 

[PATCH] D96769: [OpenMP][AMDGPU] Skip backend and assemble phases for amdgcn

2021-02-25 Thread Pushpinder Singh via Phabricator via cfe-commits
pdhaliwal added a comment.

So, neither emit-llvm-bc or emit-llvm work well with save-temps. Therefore, I 
feel the current approach is still valid. This does not impact nvptx or any 
other target in any way. And I don't see how.

I see valid concern regarding assembly output. This patch will surely halt the 
device assembly output. I am working on that which require adding an extra llc 
step in AMDGPUOpenMPToolChain.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D96769/new/

https://reviews.llvm.org/D96769

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D97273: OpenMP: Fix object clobbering issue when using save-temps

2021-02-24 Thread Pushpinder Singh via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rG99951aa68da3: OpenMP: Fix object clobbering issue when using 
save-temps (authored by pdhaliwal).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D97273/new/

https://reviews.llvm.org/D97273

Files:
  clang/lib/Driver/Driver.cpp
  clang/test/Driver/openmp-offload-gpu.c


Index: clang/test/Driver/openmp-offload-gpu.c
===
--- clang/test/Driver/openmp-offload-gpu.c
+++ clang/test/Driver/openmp-offload-gpu.c
@@ -310,3 +310,9 @@
 // RUN:   | FileCheck -check-prefix=OPENMP_NVPTX_WRAPPERS %s
 // OPENMP_NVPTX_WRAPPERS: clang{{.*}}"-cc1"{{.*}}"-triple" 
"nvptx64-nvidia-cuda"
 // OPENMP_NVPTX_WRAPPERS-SAME: "-internal-isystem" "{{.*}}openmp_wrappers"
+
+// RUN:   %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \
+// RUN:  -save-temps -no-canonical-prefixes -ccc-print-bindings %s -o 
openmp-offload-gpu 2>&1 \
+// RUN:   | FileCheck -check-prefix=SAVE_TEMPS_NAMES %s
+
+// SAVE_TEMPS_NAMES-NOT: "GNU::Linker"{{.*}}["[[SAVE_TEMPS_INPUT1:.*\.o]]", 
"[[SAVE_TEMPS_INPUT1]]"]
Index: clang/lib/Driver/Driver.cpp
===
--- clang/lib/Driver/Driver.cpp
+++ clang/lib/Driver/Driver.cpp
@@ -4675,11 +4675,12 @@
 /*CreatePrefixForHost=*/!!A->getOffloadingHostActiveKinds() &&
 !AtTopLevel);
 if (isa(JA)) {
-  OffloadingPrefix += "-wrapper";
   if (Arg *FinalOutput = C.getArgs().getLastArg(options::OPT_o))
 BaseInput = FinalOutput->getValue();
   else
 BaseInput = getDefaultImageName();
+  BaseInput =
+  C.getArgs().MakeArgString(std::string(BaseInput) + "-wrapper");
 }
 Result = InputInfo(A, GetNamedOutputPath(C, *JA, BaseInput, BoundArch,
  AtTopLevel, MultipleArchs,


Index: clang/test/Driver/openmp-offload-gpu.c
===
--- clang/test/Driver/openmp-offload-gpu.c
+++ clang/test/Driver/openmp-offload-gpu.c
@@ -310,3 +310,9 @@
 // RUN:   | FileCheck -check-prefix=OPENMP_NVPTX_WRAPPERS %s
 // OPENMP_NVPTX_WRAPPERS: clang{{.*}}"-cc1"{{.*}}"-triple" "nvptx64-nvidia-cuda"
 // OPENMP_NVPTX_WRAPPERS-SAME: "-internal-isystem" "{{.*}}openmp_wrappers"
+
+// RUN:   %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \
+// RUN:  -save-temps -no-canonical-prefixes -ccc-print-bindings %s -o openmp-offload-gpu 2>&1 \
+// RUN:   | FileCheck -check-prefix=SAVE_TEMPS_NAMES %s
+
+// SAVE_TEMPS_NAMES-NOT: "GNU::Linker"{{.*}}["[[SAVE_TEMPS_INPUT1:.*\.o]]", "[[SAVE_TEMPS_INPUT1]]"]
Index: clang/lib/Driver/Driver.cpp
===
--- clang/lib/Driver/Driver.cpp
+++ clang/lib/Driver/Driver.cpp
@@ -4675,11 +4675,12 @@
 /*CreatePrefixForHost=*/!!A->getOffloadingHostActiveKinds() &&
 !AtTopLevel);
 if (isa(JA)) {
-  OffloadingPrefix += "-wrapper";
   if (Arg *FinalOutput = C.getArgs().getLastArg(options::OPT_o))
 BaseInput = FinalOutput->getValue();
   else
 BaseInput = getDefaultImageName();
+  BaseInput =
+  C.getArgs().MakeArgString(std::string(BaseInput) + "-wrapper");
 }
 Result = InputInfo(A, GetNamedOutputPath(C, *JA, BaseInput, BoundArch,
  AtTopLevel, MultipleArchs,
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D97273: OpenMP: Fix object clobbering issue when using save-temps

2021-02-23 Thread Pushpinder Singh via Phabricator via cfe-commits
pdhaliwal added a comment.

Here's a bit of background,
OffloadingPrefix was not getting properly set in the dependent actions of 
OffloadWrapperJobAction (which are backend [11] and assemble [12]). Since 
backend [11] and assemble [12] host-wrapper actions have same logic to the 
other host actions (3 & 4), those will overwrite the previous generated files 
from host-only actions.

For e.g. following were the names generated for output files previously (marked 
as bold). (clang -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda -save-temps 
-ccc-print-bindings helloworld.c -o helloworld)

1. "x86_64-unknown-linux-gnu" - "clang", inputs: ["helloworld.c"], output: 
"helloworld-host-x86_64-unknown-linux-gnu.i"
2. "x86_64-unknown-linux-gnu" - "clang", inputs: 
["helloworld-host-x86_64-unknown-linux-gnu.i"], output: 
"helloworld-host-x86_64-unknown-linux-gnu.bc"
3. "x86_64-unknown-linux-gnu" - "clang", inputs: 
["helloworld-host-x86_64-unknown-linux-gnu.bc"], output: 
"helloworld-host-x86_64-unknown-linux-gnu.s"
4. "x86_64-unknown-linux-gnu" - "clang::as", inputs: 
["helloworld-host-x86_64-unknown-linux-gnu.s"], output: 
"helloworld-host-x86_64-unknown-linux-gnu.o"
5. "nvptx64-nvidia-cuda" - "clang", inputs: ["helloworld.c"], output: 
"helloworld-openmp-nvptx64-nvidia-cuda.i"
6. "nvptx64-nvidia-cuda" - "clang", inputs: 
["helloworld-openmp-nvptx64-nvidia-cuda.i", 
"helloworld-host-x86_64-unknown-linux-gnu.bc"], output: 
"helloworld-openmp-nvptx64-nvidia-cuda.bc"
7. "nvptx64-nvidia-cuda" - "clang", inputs: 
["helloworld-openmp-nvptx64-nvidia-cuda.bc"], output: 
"helloworld-openmp-nvptx64-nvidia-cuda.s"
8. "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: 
["helloworld-openmp-nvptx64-nvidia-cuda.s"], output: 
"helloworld-openmp-nvptx64-nvidia-cuda.o"
9. "nvptx64-nvidia-cuda" - "NVPTX::OpenMPLinker", inputs: 
["helloworld-openmp-nvptx64-nvidia-cuda.o"], output: 
"a.out-openmp-nvptx64-nvidia-cuda"
10. "x86_64-unknown-linux-gnu" - "offload wrapper", inputs: 
["a.out-openmp-nvptx64-nvidia-cuda"], output: 
"**helloworld-host-x86_64-unknown-linux-gnu-wrapper.bc**"
11. "x86_64-unknown-linux-gnu" - "clang", inputs: 
["helloworld-host-x86_64-unknown-linux-gnu-wrapper.bc"], output: 
"**helloworld-host-x86_64-unknown-linux-gnu.s**"
12. "x86_64-unknown-linux-gnu" - "clang::as", inputs: 
["helloworld-host-x86_64-unknown-linux-gnu.s"], output: 
"**helloworld-host-x86_64-unknown-linux-gnu.o**"
13. "x86_64-unknown-linux-gnu" - "GNU::Linker", inputs: 
["**helloworld-host-x86_64-unknown-linux-gnu.o", 
"helloworld-host-x86_64-unknown-linux-gnu.o**"], output: "helloworld"

And here are names generated after this patch applied,

1. "x86_64-unknown-linux-gnu" - "clang", inputs: ["helloworld.c"], output: 
"helloworld-host-x86_64-unknown-linux-gnu.i"
2. "x86_64-unknown-linux-gnu" - "clang", inputs: 
["helloworld-host-x86_64-unknown-linux-gnu.i"], output: 
"helloworld-host-x86_64-unknown-linux-gnu.bc"
3. "x86_64-unknown-linux-gnu" - "clang", inputs: 
["helloworld-host-x86_64-unknown-linux-gnu.bc"], output: 
"helloworld-host-x86_64-unknown-linux-gnu.s"
4. "x86_64-unknown-linux-gnu" - "clang::as", inputs: 
["helloworld-host-x86_64-unknown-linux-gnu.s"], output: 
"helloworld-host-x86_64-unknown-linux-gnu.o"
5. "nvptx64-nvidia-cuda" - "clang", inputs: ["helloworld.c"], output: 
"helloworld-openmp-nvptx64-nvidia-cuda.i"
6. "nvptx64-nvidia-cuda" - "clang", inputs: 
["helloworld-openmp-nvptx64-nvidia-cuda.i", 
"helloworld-host-x86_64-unknown-linux-gnu.bc"], output: 
"helloworld-openmp-nvptx64-nvidia-cuda.bc"
7. "nvptx64-nvidia-cuda" - "clang", inputs: 
["helloworld-openmp-nvptx64-nvidia-cuda.bc"], output: 
"helloworld-openmp-nvptx64-nvidia-cuda.s"
8. "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: 
["helloworld-openmp-nvptx64-nvidia-cuda.s"], output: 
"helloworld-openmp-nvptx64-nvidia-cuda.o"
9. "nvptx64-nvidia-cuda" - "NVPTX::OpenMPLinker", inputs: 
["helloworld-openmp-nvptx64-nvidia-cuda.o"], output: 
"a.out-openmp-nvptx64-nvidia-cuda"
10. "x86_64-unknown-linux-gnu" - "offload wrapper", inputs: 
["a.out-openmp-nvptx64-nvidia-cuda"], output: 
"**helloworld-wrapper-host-x86_64-unknown-linux-gnu.bc**"
11. "x86_64-unknown-linux-gnu" - "clang", inputs: 
["helloworld-wrapper-host-x86_64-unknown-linux-gnu.bc"], output: 
"**helloworld-wrapper-host-x86_64-unknown-linux-gnu.s**"
12. "x86_64-unknown-linux-gnu" - "clang::as", inputs: 
["helloworld-wrapper-host-x86_64-unknown-linux-gnu.s"], output: 
"**helloworld-wrapper-host-x86_64-unknown-linux-gnu.o**"
13. "x86_64-unknown-linux-gnu" - "GNU::Linker", inputs: 
["**helloworld-host-x86_64-unknown-linux-gnu.o**", 
"**helloworld-wrapper-host-x86_64-unknown-linux-gnu.o"**], output: "helloworld"

So for having OffloadingPrefix different for 11 & 12 would require to 
distinguish latter from 3 & 4 which I don't think is possible. However, the 
changes to BaseInput in OffloadWrapperJobAction [10]  will also reflect in the 
dependent backend [11] and assemble [12] actions as BaseInput is present in 
InputInfo 

[PATCH] D97273: OpenMP: Fix object clobbering issue when using save-temps

2021-02-23 Thread Pushpinder Singh via Phabricator via cfe-commits
pdhaliwal created this revision.
pdhaliwal added reviewers: jdoerfert, JonChesterfield, ronlieb, tianshilei1992.
Herald added subscribers: guansong, yaxunl.
pdhaliwal requested review of this revision.
Herald added subscribers: cfe-commits, sstefan1.
Herald added a project: clang.

There are two preconditions to reproduce the issue,

1. Use -save-temps option
2. Provide the -o option with name equal to the input file name without the 
file extension. For e.g. clang a.c -o a

With the -o specified, the OffloadWrapperJobAction will produce
the object file with same name as host code object file. Due to
this clash, the OffloadWrapperAction overwrites the initial
host object file, which results in lld error. This also fixes
the issue mentioned in D96769 .


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D97273

Files:
  clang/lib/Driver/Driver.cpp
  clang/test/Driver/openmp-offload-gpu.c


Index: clang/test/Driver/openmp-offload-gpu.c
===
--- clang/test/Driver/openmp-offload-gpu.c
+++ clang/test/Driver/openmp-offload-gpu.c
@@ -300,3 +300,9 @@
 // RUN:   | FileCheck -check-prefix=OPENMP_NVPTX_WRAPPERS %s
 // OPENMP_NVPTX_WRAPPERS: clang{{.*}}"-cc1"{{.*}}"-triple" 
"nvptx64-nvidia-cuda"
 // OPENMP_NVPTX_WRAPPERS-SAME: "-internal-isystem" "{{.*}}openmp_wrappers"
+
+// RUN:   %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \
+// RUN:  -save-temps -no-canonical-prefixes -ccc-print-bindings %s -o 
openmp-offload-gpu 2>&1 \
+// RUN:   | FileCheck -check-prefix=SAVE_TEMPS_NAMES %s
+
+// SAVE_TEMPS_NAMES-NOT: "GNU::Linker"{{.*}}["[[SAVE_TEMPS_INPUT1:.*\.o]]", 
"[[SAVE_TEMPS_INPUT1]]"]
Index: clang/lib/Driver/Driver.cpp
===
--- clang/lib/Driver/Driver.cpp
+++ clang/lib/Driver/Driver.cpp
@@ -4683,11 +4683,12 @@
 /*CreatePrefixForHost=*/!!A->getOffloadingHostActiveKinds() &&
 !AtTopLevel);
 if (isa(JA)) {
-  OffloadingPrefix += "-wrapper";
   if (Arg *FinalOutput = C.getArgs().getLastArg(options::OPT_o))
 BaseInput = FinalOutput->getValue();
   else
 BaseInput = getDefaultImageName();
+  BaseInput =
+  C.getArgs().MakeArgString(std::string(BaseInput) + "-wrapper");
 }
 Result = InputInfo(A, GetNamedOutputPath(C, *JA, BaseInput, BoundArch,
  AtTopLevel, MultipleArchs,


Index: clang/test/Driver/openmp-offload-gpu.c
===
--- clang/test/Driver/openmp-offload-gpu.c
+++ clang/test/Driver/openmp-offload-gpu.c
@@ -300,3 +300,9 @@
 // RUN:   | FileCheck -check-prefix=OPENMP_NVPTX_WRAPPERS %s
 // OPENMP_NVPTX_WRAPPERS: clang{{.*}}"-cc1"{{.*}}"-triple" "nvptx64-nvidia-cuda"
 // OPENMP_NVPTX_WRAPPERS-SAME: "-internal-isystem" "{{.*}}openmp_wrappers"
+
+// RUN:   %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \
+// RUN:  -save-temps -no-canonical-prefixes -ccc-print-bindings %s -o openmp-offload-gpu 2>&1 \
+// RUN:   | FileCheck -check-prefix=SAVE_TEMPS_NAMES %s
+
+// SAVE_TEMPS_NAMES-NOT: "GNU::Linker"{{.*}}["[[SAVE_TEMPS_INPUT1:.*\.o]]", "[[SAVE_TEMPS_INPUT1]]"]
Index: clang/lib/Driver/Driver.cpp
===
--- clang/lib/Driver/Driver.cpp
+++ clang/lib/Driver/Driver.cpp
@@ -4683,11 +4683,12 @@
 /*CreatePrefixForHost=*/!!A->getOffloadingHostActiveKinds() &&
 !AtTopLevel);
 if (isa(JA)) {
-  OffloadingPrefix += "-wrapper";
   if (Arg *FinalOutput = C.getArgs().getLastArg(options::OPT_o))
 BaseInput = FinalOutput->getValue();
   else
 BaseInput = getDefaultImageName();
+  BaseInput =
+  C.getArgs().MakeArgString(std::string(BaseInput) + "-wrapper");
 }
 Result = InputInfo(A, GetNamedOutputPath(C, *JA, BaseInput, BoundArch,
  AtTopLevel, MultipleArchs,
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D96769: [OpenMP][AMDGPU] Skip backend and assemble phases for amdgcn

2021-02-16 Thread Pushpinder Singh via Phabricator via cfe-commits
pdhaliwal added a comment.

It is because of how addClangTargetOptions is invoked. In case of save-temps, 
it is being invoked for all the actions resulting in target cc1 call. That's 
why all these invocations have -emit-llvm-bc. I guess we need Action as an 
argument to addClangTargetOptions.

Also, it does not make sense for having assemble and backend action for amdgcn 
as linker is dependent directly on llvm IR. They will also come up redundantly 
in the -ccc-print-phases.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D96769/new/

https://reviews.llvm.org/D96769

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D96769: [OpenMP][AMDGPU] Skip backend and assemble phases for amdgcn

2021-02-16 Thread Pushpinder Singh via Phabricator via cfe-commits
pdhaliwal added a comment.

emit-llvm-bc does not correctly solve the problem. It works because [input, 
compile, assemble, backend] actions collapse to a single action by driver. This 
single command handles emit-llvm-bc properly. But when save-temps is specified, 
this collapsing does not happen which messes up command line flags of the jobs 
and hence the output, for e.g., preprocessor command also has -emit-llvm-bc.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D96769/new/

https://reviews.llvm.org/D96769

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D96769: [OpenMP][AMDGPU] Skip backend and assemble phases for amdgcn

2021-02-16 Thread Pushpinder Singh via Phabricator via cfe-commits
pdhaliwal added a comment.

This does fixes the save-temps but only when -o is not specified. If -o is 
specified the name of host object file and host-wrapper object file (second 
last phase) is same, which fails the linker. This does not seem to be related 
to this patch.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D96769/new/

https://reviews.llvm.org/D96769

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D96769: [OpenMP][AMDGPU] Skip backend and assemble phases for amdgcn

2021-02-16 Thread Pushpinder Singh via Phabricator via cfe-commits
pdhaliwal added inline comments.



Comment at: clang/lib/Driver/Driver.cpp:3057
+  for (unsigned I = 0; I < ToolChains.size(); ++I) {
+Action * = OpenMPDeviceActions[I];
+// AMDGPU does not support linking of object files, so we skip

This logic is based on the assumption that the ith item in OpenMPDeviceActions 
corresponds to ith item in ToolChains array. Size of both lists is guaranteed 
to be same from assert on #3035.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D96769/new/

https://reviews.llvm.org/D96769

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D96769: [OpenMP][AMDGPU] Skip backend and assemble phases for amdgcn

2021-02-16 Thread Pushpinder Singh via Phabricator via cfe-commits
pdhaliwal created this revision.
pdhaliwal added reviewers: jdoerfert, JonChesterfield, ronlieb.
Herald added subscribers: kerbowa, guansong, t-tye, tpr, dstuttard, yaxunl, 
nhaehnle, jvesely, kzhuravl.
pdhaliwal requested review of this revision.
Herald added subscribers: cfe-commits, sstefan1, wdng.
Herald added a project: clang.

AMDGCN does not support linking of object files, hence
backend and assemble actions are skipped to produce LLVM IR.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D96769

Files:
  clang/lib/Driver/Driver.cpp
  clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
  clang/test/Driver/amdgpu-openmp-toolchain.c


Index: clang/test/Driver/amdgpu-openmp-toolchain.c
===
--- clang/test/Driver/amdgpu-openmp-toolchain.c
+++ clang/test/Driver/amdgpu-openmp-toolchain.c
@@ -1,3 +1,4 @@
+// REQUIRES: x86-registered-target
 // REQUIRES: amdgpu-registered-target
 // RUN:   env LIBRARY_PATH=%S/Inputs/hip_dev_lib %clang -### 
--target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa 
-Xopenmp-target=amdgcn-amd-amdhsa -march=gfx906 %s 2>&1 \
 // RUN:   | FileCheck %s
@@ -5,7 +6,7 @@
 // verify the tools invocations
 // CHECK: clang{{.*}}"-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-x" 
"c"{{.*}}
 // CHECK: clang{{.*}}"-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-x" 
"ir"{{.*}}
-// CHECK: clang{{.*}}"-cc1"{{.*}}"-triple" 
"amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx906" "-fcuda-is-device" 
"-emit-llvm-bc" 
"-mlink-builtin-bitcode"{{.*}}libomptarget-amdgcn-gfx906.bc"{{.*}}
+// CHECK: clang{{.*}}"-cc1"{{.*}}"-triple" 
"amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx906" "-fcuda-is-device" 
"-mlink-builtin-bitcode"{{.*}}libomptarget-amdgcn-gfx906.bc"{{.*}}
 // CHECK: llvm-link{{.*}}"-o" 
"{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906-linked-{{.*}}.bc"
 // CHECK: llc{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906-linked-{{.*}}.bc" 
"-mtriple=amdgcn-amd-amdhsa" "-mcpu=gfx906" "-filetype=obj" 
"-o"{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906-{{.*}}.o"
 // CHECK: lld{{.*}}"-flavor" "gnu" "--no-undefined" "-shared" 
"-o"{{.*}}amdgpu-openmp-toolchain-{{.*}}.out" 
"{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906-{{.*}}.o"
@@ -25,18 +26,29 @@
 // CHECK-PHASES: 6: preprocessor, {5}, cpp-output, (device-openmp)
 // CHECK-PHASES: 7: compiler, {6}, ir, (device-openmp)
 // CHECK-PHASES: 8: offload, "host-openmp (x86_64-unknown-linux-gnu)" {2}, 
"device-openmp (amdgcn-amd-amdhsa)" {7}, ir
-// CHECK-PHASES: 9: backend, {8}, assembler, (device-openmp)
-// CHECK-PHASES: 10: assembler, {9}, object, (device-openmp)
-// CHECK-PHASES: 11: linker, {10}, image, (device-openmp)
-// CHECK-PHASES: 12: offload, "device-openmp (amdgcn-amd-amdhsa)" {11}, image
-// CHECK-PHASES: 13: clang-offload-wrapper, {12}, ir, (host-openmp)
-// CHECK-PHASES: 14: backend, {13}, assembler, (host-openmp)
-// CHECK-PHASES: 15: assembler, {14}, object, (host-openmp)
-// CHECK-PHASES: 16: linker, {4, 15}, image, (host-openmp)
+// CHECK-PHASES: 9: linker, {8}, image, (device-openmp)
+// CHECK-PHASES: 10: offload, "device-openmp (amdgcn-amd-amdhsa)" {9}, image
+// CHECK-PHASES: 11: clang-offload-wrapper, {10}, ir, (host-openmp)
+// CHECK-PHASES: 12: backend, {11}, assembler, (host-openmp)
+// CHECK-PHASES: 13: assembler, {12}, object, (host-openmp)
+// CHECK-PHASES: 14: linker, {4, 13}, image, (host-openmp)
 
 // handling of --libomptarget-amdgcn-bc-path
 // RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp 
-fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa 
-march=gfx803 
--libomptarget-amdgcn-bc-path=%S/Inputs/hip_dev_lib/libomptarget-amdgcn-gfx803.bc
 %s 2>&1 | FileCheck %s --check-prefix=CHECK-LIBOMPTARGET
-// CHECK-LIBOMPTARGET: clang{{.*}}"-cc1"{{.*}}"-triple" 
"amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx803" "-fcuda-is-device" 
"-emit-llvm-bc" 
"-mlink-builtin-bitcode"{{.*}}Inputs/hip_dev_lib/libomptarget-amdgcn-gfx803.bc"{{.*}}
+// CHECK-LIBOMPTARGET: clang{{.*}}"-cc1"{{.*}}"-triple" 
"amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx803" "-fcuda-is-device" 
"-mlink-builtin-bitcode"{{.*}}Inputs/hip_dev_lib/libomptarget-amdgcn-gfx803.bc"{{.*}}
 
 // RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp 
-fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa 
-march=gfx803 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-NOGPULIB
-// CHECK-NOGPULIB-NOT: clang{{.*}}"-cc1"{{.*}}"-triple" 
"amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx803" "-fcuda-is-device" 
"-emit-llvm-bc" 
"-mlink-builtin-bitcode"{{.*}}libomptarget-amdgcn-gfx803.bc"{{.*}}
+// CHECK-NOGPULIB-NOT: clang{{.*}}"-cc1"{{.*}}"-triple" 
"amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx803" "-fcuda-is-device" 
"-mlink-builtin-bitcode"{{.*}}libomptarget-amdgcn-gfx803.bc"{{.*}}
+
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings 
-save-temps -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa 
-Xopenmp-target=amdgcn-amd-amdhsa -march=gfx803 

[PATCH] D96248: [OpenMP][AMDGPU] Add support for linking libomptarget bitcode

2021-02-11 Thread Pushpinder Singh via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG79401b43ce4e: [OpenMP][AMDGPU] Add support for linking 
libomptarget bitcode (authored by Pushpinder Singh 
pushpinderdhaliwa...@gmail.com, committed by pdhaliwal).

Changed prior to commit:
  https://reviews.llvm.org/D96248?vs=322638=323219#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D96248/new/

https://reviews.llvm.org/D96248

Files:
  clang/include/clang/Basic/DiagnosticDriverKinds.td
  clang/include/clang/Driver/Options.td
  clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
  clang/lib/Driver/ToolChains/CommonArgs.cpp
  clang/lib/Driver/ToolChains/CommonArgs.h
  clang/lib/Driver/ToolChains/Cuda.cpp
  clang/test/Driver/Inputs/hip_dev_lib/libomptarget-amdgcn-gfx803.bc
  clang/test/Driver/Inputs/hip_dev_lib/libomptarget-amdgcn-gfx906.bc
  clang/test/Driver/amdgpu-openmp-toolchain.c

Index: clang/test/Driver/amdgpu-openmp-toolchain.c
===
--- clang/test/Driver/amdgpu-openmp-toolchain.c
+++ clang/test/Driver/amdgpu-openmp-toolchain.c
@@ -1,11 +1,11 @@
 // REQUIRES: amdgpu-registered-target
-// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx906 %s 2>&1 \
+// RUN:   env LIBRARY_PATH=%S/Inputs/hip_dev_lib %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx906 %s 2>&1 \
 // RUN:   | FileCheck %s
 
 // verify the tools invocations
 // CHECK: clang{{.*}}"-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-x" "c"{{.*}}
 // CHECK: clang{{.*}}"-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-x" "ir"{{.*}}
-// CHECK: clang{{.*}}"-cc1"{{.*}}"-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx906" "-fcuda-is-device" "-emit-llvm-bc"{{.*}}
+// CHECK: clang{{.*}}"-cc1"{{.*}}"-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx906" "-fcuda-is-device" "-emit-llvm-bc" "-mlink-builtin-bitcode"{{.*}}libomptarget-amdgcn-gfx906.bc"{{.*}}
 // CHECK: llvm-link{{.*}}"-o" "{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906-linked-{{.*}}.bc"
 // CHECK: llc{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906-linked-{{.*}}.bc" "-mtriple=amdgcn-amd-amdhsa" "-mcpu=gfx906" "-filetype=obj" "-o"{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906-{{.*}}.o"
 // CHECK: lld{{.*}}"-flavor" "gnu" "--no-undefined" "-shared" "-o"{{.*}}amdgpu-openmp-toolchain-{{.*}}.out" "{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906-{{.*}}.o"
@@ -34,3 +34,9 @@
 // CHECK-PHASES: 15: assembler, {14}, object, (host-openmp)
 // CHECK-PHASES: 16: linker, {4, 15}, image, (host-openmp)
 
+// handling of --libomptarget-amdgcn-bc-path
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx803 --libomptarget-amdgcn-bc-path=%S/Inputs/hip_dev_lib/libomptarget-amdgcn-gfx803.bc %s 2>&1 | FileCheck %s --check-prefix=CHECK-LIBOMPTARGET
+// CHECK-LIBOMPTARGET: clang{{.*}}"-cc1"{{.*}}"-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx803" "-fcuda-is-device" "-emit-llvm-bc" "-mlink-builtin-bitcode"{{.*}}Inputs/hip_dev_lib/libomptarget-amdgcn-gfx803.bc"{{.*}}
+
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx803 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-NOGPULIB
+// CHECK-NOGPULIB-NOT: clang{{.*}}"-cc1"{{.*}}"-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx803" "-fcuda-is-device" "-emit-llvm-bc" "-mlink-builtin-bitcode"{{.*}}libomptarget-amdgcn-gfx803.bc"{{.*}}
Index: clang/lib/Driver/ToolChains/Cuda.cpp
===
--- clang/lib/Driver/ToolChains/Cuda.cpp
+++ clang/lib/Driver/ToolChains/Cuda.cpp
@@ -749,56 +749,10 @@
 CudaVersionToString(CudaInstallation.version(;
 
   if (DeviceOffloadingKind == Action::OFK_OpenMP) {
-SmallVector LibraryPaths;
-// Add user defined library paths from LIBRARY_PATH.
-llvm::Optional LibPath =
-llvm::sys::Process::GetEnv("LIBRARY_PATH");
-if (LibPath) {
-  SmallVector Frags;
-  const char EnvPathSeparatorStr[] = {llvm::sys::EnvPathSeparator, '\0'};
-  llvm::SplitString(*LibPath, Frags, EnvPathSeparatorStr);
-  for (StringRef Path : Frags)
-LibraryPaths.emplace_back(Path.trim());
-}
-
-// Add path to lib / lib64 folder.
-SmallString<256> DefaultLibPath =
-llvm::sys::path::parent_path(getDriver().Dir);
-llvm::sys::path::append(DefaultLibPath, Twine("lib") + CLANG_LIBDIR_SUFFIX);
-LibraryPaths.emplace_back(DefaultLibPath.c_str());
-
-// First check whether user specifies bc library
-if (const Arg *A =
-DriverArgs.getLastArg(options::OPT_libomptarget_nvptx_bc_path_EQ)) {
-  std::string 

[PATCH] D96248: [OpenMP][AMDGPU] Add support for linking libomptarget bitcode

2021-02-10 Thread Pushpinder Singh via Phabricator via cfe-commits
pdhaliwal updated this revision to Diff 322638.
pdhaliwal marked an inline comment as done.
pdhaliwal added a comment.

I haave removed libomptarget-device-bc-path and have added amdgcn one. For 
diagnostic,
instead of having one per architecture, I have used the same and added second
parameter to specify arch.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D96248/new/

https://reviews.llvm.org/D96248

Files:
  clang/include/clang/Basic/DiagnosticDriverKinds.td
  clang/include/clang/Driver/Options.td
  clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
  clang/lib/Driver/ToolChains/CommonArgs.cpp
  clang/lib/Driver/ToolChains/CommonArgs.h
  clang/lib/Driver/ToolChains/Cuda.cpp
  clang/test/Driver/Inputs/hip_dev_lib/libomptarget-amdgcn-gfx803.bc
  clang/test/Driver/Inputs/hip_dev_lib/libomptarget-amdgcn-gfx906.bc
  clang/test/Driver/amdgpu-openmp-toolchain.c

Index: clang/test/Driver/amdgpu-openmp-toolchain.c
===
--- clang/test/Driver/amdgpu-openmp-toolchain.c
+++ clang/test/Driver/amdgpu-openmp-toolchain.c
@@ -1,11 +1,11 @@
 // REQUIRES: amdgpu-registered-target
-// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx906 %s 2>&1 \
+// RUN:   env LIBRARY_PATH=%S/Inputs/hip_dev_lib %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx906 %s 2>&1 \
 // RUN:   | FileCheck %s
 
 // verify the tools invocations
 // CHECK: clang{{.*}}"-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-x" "c"{{.*}}
 // CHECK: clang{{.*}}"-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-x" "ir"{{.*}}
-// CHECK: clang{{.*}}"-cc1"{{.*}}"-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx906" "-fcuda-is-device" "-emit-llvm-bc"{{.*}}
+// CHECK: clang{{.*}}"-cc1"{{.*}}"-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx906" "-fcuda-is-device" "-emit-llvm-bc" "-mlink-builtin-bitcode"{{.*}}libomptarget-amdgcn-gfx906.bc"{{.*}}
 // CHECK: llvm-link{{.*}}"-o" "{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906-linked-{{.*}}.bc"
 // CHECK: llc{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906-linked-{{.*}}.bc" "-mtriple=amdgcn-amd-amdhsa" "-mcpu=gfx906" "-filetype=obj" "-o"{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906-{{.*}}.o"
 // CHECK: lld{{.*}}"-flavor" "gnu" "--no-undefined" "-shared" "-o"{{.*}}amdgpu-openmp-toolchain-{{.*}}.out" "{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906-{{.*}}.o"
@@ -34,3 +34,9 @@
 // CHECK-PHASES: 15: assembler, {14}, object, (host-openmp)
 // CHECK-PHASES: 16: linker, {4, 15}, image, (host-openmp)
 
+// handling of --libomptarget-amdgcn-bc-path
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx803 --libomptarget-amdgcn-bc-path=%S/Inputs/hip_dev_lib/libomptarget-amdgcn-gfx803.bc %s 2>&1 | FileCheck %s --check-prefix=CHECK-LIBOMPTARGET
+// CHECK-LIBOMPTARGET: clang{{.*}}"-cc1"{{.*}}"-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx803" "-fcuda-is-device" "-emit-llvm-bc" "-mlink-builtin-bitcode"{{.*}}Inputs/hip_dev_lib/libomptarget-amdgcn-gfx803.bc"{{.*}}
+
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx803 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-NOGPULIB
+// CHECK-NOGPULIB-NOT: clang{{.*}}"-cc1"{{.*}}"-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx803" "-fcuda-is-device" "-emit-llvm-bc" "-mlink-builtin-bitcode"{{.*}}libomptarget-amdgcn-gfx803.bc"{{.*}}
Index: clang/lib/Driver/ToolChains/Cuda.cpp
===
--- clang/lib/Driver/ToolChains/Cuda.cpp
+++ clang/lib/Driver/ToolChains/Cuda.cpp
@@ -749,56 +749,10 @@
 CudaVersionToString(CudaInstallation.version(;
 
   if (DeviceOffloadingKind == Action::OFK_OpenMP) {
-SmallVector LibraryPaths;
-// Add user defined library paths from LIBRARY_PATH.
-llvm::Optional LibPath =
-llvm::sys::Process::GetEnv("LIBRARY_PATH");
-if (LibPath) {
-  SmallVector Frags;
-  const char EnvPathSeparatorStr[] = {llvm::sys::EnvPathSeparator, '\0'};
-  llvm::SplitString(*LibPath, Frags, EnvPathSeparatorStr);
-  for (StringRef Path : Frags)
-LibraryPaths.emplace_back(Path.trim());
-}
-
-// Add path to lib / lib64 folder.
-SmallString<256> DefaultLibPath =
-llvm::sys::path::parent_path(getDriver().Dir);
-llvm::sys::path::append(DefaultLibPath, Twine("lib") + CLANG_LIBDIR_SUFFIX);
-LibraryPaths.emplace_back(DefaultLibPath.c_str());
-
-// First check whether user specifies bc library
-if (const Arg *A =
-DriverArgs.getLastArg(options::OPT_libomptarget_nvptx_bc_path_EQ)) {
-  std::string LibOmpTargetName(A->getValue());
-  if 

[PATCH] D96248: [OpenMP][AMDGPU] Add support for linking libomptarget bitcode

2021-02-08 Thread Pushpinder Singh via Phabricator via cfe-commits
pdhaliwal updated this revision to Diff 322298.
pdhaliwal added a comment.

- Added check for nogpulib
- Fixed diagnostic message


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D96248/new/

https://reviews.llvm.org/D96248

Files:
  clang/include/clang/Basic/DiagnosticDriverKinds.td
  clang/include/clang/Driver/Options.td
  clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
  clang/lib/Driver/ToolChains/CommonArgs.cpp
  clang/lib/Driver/ToolChains/CommonArgs.h
  clang/lib/Driver/ToolChains/Cuda.cpp
  clang/test/Driver/Inputs/hip_dev_lib/libomptarget-amdgcn-gfx803.bc
  clang/test/Driver/Inputs/hip_dev_lib/libomptarget-amdgcn-gfx906.bc
  clang/test/Driver/amdgpu-openmp-toolchain.c
  clang/test/Driver/openmp-offload-gpu.c

Index: clang/test/Driver/openmp-offload-gpu.c
===
--- clang/test/Driver/openmp-offload-gpu.c
+++ clang/test/Driver/openmp-offload-gpu.c
@@ -177,7 +177,7 @@
 // RUN:   -fopenmp-relocatable-target -save-temps -no-canonical-prefixes %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=CHK-BCLIB-WARN %s
 
-// CHK-BCLIB-WARN: No library 'libomptarget-nvptx-cuda_80-sm_20.bc' found in the default clang lib directory or in LIBRARY_PATH. Please use --libomptarget-nvptx-bc-path to specify nvptx bitcode library.
+// CHK-BCLIB-WARN: No library 'libomptarget-nvptx-cuda_80-sm_20.bc' found in the default clang lib directory or in LIBRARY_PATH. Please use --libomptarget-device-bc-path to specify device bitcode library.
 
 /// ###
 
Index: clang/test/Driver/amdgpu-openmp-toolchain.c
===
--- clang/test/Driver/amdgpu-openmp-toolchain.c
+++ clang/test/Driver/amdgpu-openmp-toolchain.c
@@ -1,11 +1,11 @@
 // REQUIRES: amdgpu-registered-target
-// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx906 %s 2>&1 \
+// RUN:   env LIBRARY_PATH=%S/Inputs/hip_dev_lib %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx906 %s 2>&1 \
 // RUN:   | FileCheck %s
 
 // verify the tools invocations
 // CHECK: clang{{.*}}"-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-x" "c"{{.*}}
 // CHECK: clang{{.*}}"-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-x" "ir"{{.*}}
-// CHECK: clang{{.*}}"-cc1"{{.*}}"-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx906" "-fcuda-is-device" "-emit-llvm-bc"{{.*}}
+// CHECK: clang{{.*}}"-cc1"{{.*}}"-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx906" "-fcuda-is-device" "-emit-llvm-bc" "-mlink-builtin-bitcode"{{.*}}libomptarget-amdgcn-gfx906.bc"{{.*}}
 // CHECK: llvm-link{{.*}}"-o" "{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906-linked-{{.*}}.bc"
 // CHECK: llc{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906-linked-{{.*}}.bc" "-mtriple=amdgcn-amd-amdhsa" "-mcpu=gfx906" "-filetype=obj" "-o"{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906-{{.*}}.o"
 // CHECK: lld{{.*}}"-flavor" "gnu" "--no-undefined" "-shared" "-o"{{.*}}amdgpu-openmp-toolchain-{{.*}}.out" "{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906-{{.*}}.o"
@@ -34,3 +34,9 @@
 // CHECK-PHASES: 15: assembler, {14}, object, (host-openmp)
 // CHECK-PHASES: 16: linker, {4, 15}, image, (host-openmp)
 
+// handling of --libomptarget-device-bc-path
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx803 --libomptarget-device-bc-path=%S/Inputs/hip_dev_lib/libomptarget-amdgcn-gfx803.bc %s 2>&1 | FileCheck %s --check-prefix=CHECK-LIBOMPTARGET
+// CHECK-LIBOMPTARGET: clang{{.*}}"-cc1"{{.*}}"-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx803" "-fcuda-is-device" "-emit-llvm-bc" "-mlink-builtin-bitcode"{{.*}}Inputs/hip_dev_lib/libomptarget-amdgcn-gfx803.bc"{{.*}}
+
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx803 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-NOGPULIB
+// CHECK-NOGPULIB-NOT: clang{{.*}}"-cc1"{{.*}}"-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx803" "-fcuda-is-device" "-emit-llvm-bc" "-mlink-builtin-bitcode"{{.*}}libomptarget-amdgcn-gfx803.bc"{{.*}}
Index: clang/lib/Driver/ToolChains/Cuda.cpp
===
--- clang/lib/Driver/ToolChains/Cuda.cpp
+++ clang/lib/Driver/ToolChains/Cuda.cpp
@@ -749,56 +749,9 @@
 CudaVersionToString(CudaInstallation.version(;
 
   if (DeviceOffloadingKind == Action::OFK_OpenMP) {
-SmallVector LibraryPaths;
-// Add user defined library paths from LIBRARY_PATH.
-llvm::Optional LibPath =
-llvm::sys::Process::GetEnv("LIBRARY_PATH");
-if (LibPath) {
-  SmallVector Frags;
-  const char EnvPathSeparatorStr[] = 

[PATCH] D96248: [OpenMP][AMDGPU] Add support for linking libomptarget bitcode

2021-02-08 Thread Pushpinder Singh via Phabricator via cfe-commits
pdhaliwal updated this revision to Diff 322090.
pdhaliwal added a comment.

Addressed review comments.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D96248/new/

https://reviews.llvm.org/D96248

Files:
  clang/include/clang/Driver/Options.td
  clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
  clang/lib/Driver/ToolChains/CommonArgs.cpp
  clang/lib/Driver/ToolChains/CommonArgs.h
  clang/lib/Driver/ToolChains/Cuda.cpp
  clang/test/Driver/Inputs/hip_dev_lib/libomptarget-amdgcn-gfx803.bc
  clang/test/Driver/Inputs/hip_dev_lib/libomptarget-amdgcn-gfx906.bc
  clang/test/Driver/amdgpu-openmp-toolchain.c

Index: clang/test/Driver/amdgpu-openmp-toolchain.c
===
--- clang/test/Driver/amdgpu-openmp-toolchain.c
+++ clang/test/Driver/amdgpu-openmp-toolchain.c
@@ -1,11 +1,11 @@
 // REQUIRES: amdgpu-registered-target
-// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx906 %s 2>&1 \
+// RUN:   env LIBRARY_PATH=%S/Inputs/hip_dev_lib %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx906 %s 2>&1 \
 // RUN:   | FileCheck %s
 
 // verify the tools invocations
 // CHECK: clang{{.*}}"-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-x" "c"{{.*}}
 // CHECK: clang{{.*}}"-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-x" "ir"{{.*}}
-// CHECK: clang{{.*}}"-cc1"{{.*}}"-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx906" "-fcuda-is-device" "-emit-llvm-bc"{{.*}}
+// CHECK: clang{{.*}}"-cc1"{{.*}}"-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx906" "-fcuda-is-device" "-emit-llvm-bc" "-mlink-builtin-bitcode"{{.*}}libomptarget-amdgcn-gfx906.bc"{{.*}}
 // CHECK: llvm-link{{.*}}"-o" "{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906-linked-{{.*}}.bc"
 // CHECK: llc{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906-linked-{{.*}}.bc" "-mtriple=amdgcn-amd-amdhsa" "-mcpu=gfx906" "-filetype=obj" "-o"{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906-{{.*}}.o"
 // CHECK: lld{{.*}}"-flavor" "gnu" "--no-undefined" "-shared" "-o"{{.*}}amdgpu-openmp-toolchain-{{.*}}.out" "{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906-{{.*}}.o"
@@ -34,3 +34,7 @@
 // CHECK-PHASES: 15: assembler, {14}, object, (host-openmp)
 // CHECK-PHASES: 16: linker, {4, 15}, image, (host-openmp)
 
+// handling of --libomptarget-device-bc-path
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx803 --libomptarget-device-bc-path=%S/Inputs/hip_dev_lib/libomptarget-amdgcn-gfx803.bc %s 2>&1 | FileCheck %s --check-prefix=CHECK-LIBOMPTARGET
+// CHECK-LIBOMPTARGET: clang{{.*}}"-cc1"{{.*}}"-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx803" "-fcuda-is-device" "-emit-llvm-bc" "-mlink-builtin-bitcode"{{.*}}Inputs/hip_dev_lib/libomptarget-amdgcn-gfx803.bc"{{.*}}
+
Index: clang/lib/Driver/ToolChains/Cuda.cpp
===
--- clang/lib/Driver/ToolChains/Cuda.cpp
+++ clang/lib/Driver/ToolChains/Cuda.cpp
@@ -749,56 +749,9 @@
 CudaVersionToString(CudaInstallation.version(;
 
   if (DeviceOffloadingKind == Action::OFK_OpenMP) {
-SmallVector LibraryPaths;
-// Add user defined library paths from LIBRARY_PATH.
-llvm::Optional LibPath =
-llvm::sys::Process::GetEnv("LIBRARY_PATH");
-if (LibPath) {
-  SmallVector Frags;
-  const char EnvPathSeparatorStr[] = {llvm::sys::EnvPathSeparator, '\0'};
-  llvm::SplitString(*LibPath, Frags, EnvPathSeparatorStr);
-  for (StringRef Path : Frags)
-LibraryPaths.emplace_back(Path.trim());
-}
-
-// Add path to lib / lib64 folder.
-SmallString<256> DefaultLibPath =
-llvm::sys::path::parent_path(getDriver().Dir);
-llvm::sys::path::append(DefaultLibPath, Twine("lib") + CLANG_LIBDIR_SUFFIX);
-LibraryPaths.emplace_back(DefaultLibPath.c_str());
-
-// First check whether user specifies bc library
-if (const Arg *A =
-DriverArgs.getLastArg(options::OPT_libomptarget_nvptx_bc_path_EQ)) {
-  std::string LibOmpTargetName(A->getValue());
-  if (llvm::sys::fs::exists(LibOmpTargetName)) {
-CC1Args.push_back("-mlink-builtin-bitcode");
-CC1Args.push_back(DriverArgs.MakeArgString(LibOmpTargetName));
-  } else {
-getDriver().Diag(diag::err_drv_omp_offload_target_bcruntime_not_found)
-<< LibOmpTargetName;
-  }
-} else {
-  bool FoundBCLibrary = false;
-
-  std::string LibOmpTargetName = "libomptarget-nvptx-cuda_" +
- CudaVersionStr + "-" + GpuArch.str() +
- ".bc";
-
-  for (StringRef LibraryPath : LibraryPaths) {
-SmallString<128> LibOmpTargetFile(LibraryPath);
-llvm::sys::path::append(LibOmpTargetFile, 

[PATCH] D96248: [OpenMP][AMDGPU] Add support for linking libomptarget bitcode

2021-02-08 Thread Pushpinder Singh via Phabricator via cfe-commits
pdhaliwal updated this revision to Diff 322052.
pdhaliwal added a comment.

Accidently missed some changes,

- Fix openmp-offload.c test failure
- Fix amdgpu-openmp-toolchain.c test failure


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D96248/new/

https://reviews.llvm.org/D96248

Files:
  clang/include/clang/Driver/Options.td
  clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
  clang/lib/Driver/ToolChains/CommonArgs.cpp
  clang/lib/Driver/ToolChains/CommonArgs.h
  clang/lib/Driver/ToolChains/Cuda.cpp
  clang/test/Driver/Inputs/hip_dev_lib/libomptarget-amdgcn-gfx803.bc
  clang/test/Driver/Inputs/hip_dev_lib/libomptarget-amdgcn-gfx906.bc
  clang/test/Driver/amdgpu-openmp-toolchain.c

Index: clang/test/Driver/amdgpu-openmp-toolchain.c
===
--- clang/test/Driver/amdgpu-openmp-toolchain.c
+++ clang/test/Driver/amdgpu-openmp-toolchain.c
@@ -1,11 +1,11 @@
 // REQUIRES: amdgpu-registered-target
-// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx906 %s 2>&1 \
+// RUN:   env LIBRARY_PATH=%S/Inputs/hip_dev_lib %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx906 %s 2>&1 \
 // RUN:   | FileCheck %s
 
 // verify the tools invocations
 // CHECK: clang{{.*}}"-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-x" "c"{{.*}}
 // CHECK: clang{{.*}}"-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-x" "ir"{{.*}}
-// CHECK: clang{{.*}}"-cc1"{{.*}}"-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx906" "-fcuda-is-device" "-emit-llvm-bc"{{.*}}
+// CHECK: clang{{.*}}"-cc1"{{.*}}"-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx906" "-fcuda-is-device" "-emit-llvm-bc" "-mlink-builtin-bitcode"{{.*}}libomptarget-amdgcn-gfx906.bc"{{.*}}
 // CHECK: llvm-link{{.*}}"-o" "{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906-linked-{{.*}}.bc"
 // CHECK: llc{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906-linked-{{.*}}.bc" "-mtriple=amdgcn-amd-amdhsa" "-mcpu=gfx906" "-filetype=obj" "-o"{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906-{{.*}}.o"
 // CHECK: lld{{.*}}"-flavor" "gnu" "--no-undefined" "-shared" "-o"{{.*}}amdgpu-openmp-toolchain-{{.*}}.out" "{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906-{{.*}}.o"
@@ -34,3 +34,7 @@
 // CHECK-PHASES: 15: assembler, {14}, object, (host-openmp)
 // CHECK-PHASES: 16: linker, {4, 15}, image, (host-openmp)
 
+// handling of --libomptarget-device-bc-path
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx803 --libomptarget-device-bc-path=%S/Inputs/hip_dev_lib/libomptarget-amdgcn-gfx803.bc %s 2>&1 | FileCheck %s --check-prefix=CHECK-LIBOMPTARGET
+// CHECK-LIBOMPTARGET: clang{{.*}}"-cc1"{{.*}}"-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx803" "-fcuda-is-device" "-emit-llvm-bc" "-mlink-builtin-bitcode"{{.*}}Inputs/hip_dev_lib/libomptarget-amdgcn-gfx803.bc"{{.*}}
+
Index: clang/lib/Driver/ToolChains/Cuda.cpp
===
--- clang/lib/Driver/ToolChains/Cuda.cpp
+++ clang/lib/Driver/ToolChains/Cuda.cpp
@@ -749,56 +749,9 @@
 CudaVersionToString(CudaInstallation.version(;
 
   if (DeviceOffloadingKind == Action::OFK_OpenMP) {
-SmallVector LibraryPaths;
-// Add user defined library paths from LIBRARY_PATH.
-llvm::Optional LibPath =
-llvm::sys::Process::GetEnv("LIBRARY_PATH");
-if (LibPath) {
-  SmallVector Frags;
-  const char EnvPathSeparatorStr[] = {llvm::sys::EnvPathSeparator, '\0'};
-  llvm::SplitString(*LibPath, Frags, EnvPathSeparatorStr);
-  for (StringRef Path : Frags)
-LibraryPaths.emplace_back(Path.trim());
-}
-
-// Add path to lib / lib64 folder.
-SmallString<256> DefaultLibPath =
-llvm::sys::path::parent_path(getDriver().Dir);
-llvm::sys::path::append(DefaultLibPath, Twine("lib") + CLANG_LIBDIR_SUFFIX);
-LibraryPaths.emplace_back(DefaultLibPath.c_str());
-
-// First check whether user specifies bc library
-if (const Arg *A =
-DriverArgs.getLastArg(options::OPT_libomptarget_nvptx_bc_path_EQ)) {
-  std::string LibOmpTargetName(A->getValue());
-  if (llvm::sys::fs::exists(LibOmpTargetName)) {
-CC1Args.push_back("-mlink-builtin-bitcode");
-CC1Args.push_back(DriverArgs.MakeArgString(LibOmpTargetName));
-  } else {
-getDriver().Diag(diag::err_drv_omp_offload_target_bcruntime_not_found)
-<< LibOmpTargetName;
-  }
-} else {
-  bool FoundBCLibrary = false;
-
-  std::string LibOmpTargetName = "libomptarget-nvptx-cuda_" +
- CudaVersionStr + "-" + GpuArch.str() +
- ".bc";
-
-  for (StringRef LibraryPath : LibraryPaths) {
-SmallString<128> 

[PATCH] D96248: [OpenMP][AMDGPU] Add support for linking libomptarget bitcode

2021-02-08 Thread Pushpinder Singh via Phabricator via cfe-commits
pdhaliwal created this revision.
pdhaliwal added reviewers: jdoerfert, JonChesterfield, ronlieb, saiislam, 
ABataev.
Herald added subscribers: dang, kerbowa, guansong, t-tye, tpr, dstuttard, 
yaxunl, nhaehnle, jvesely, kzhuravl.
Herald added a reviewer: jansvoboda11.
pdhaliwal requested review of this revision.
Herald added subscribers: cfe-commits, sstefan1, wdng.
Herald added a project: clang.

This patch uses the existing logic of CUDA for searching libomptarget
and extracts it to a common method.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D96248

Files:
  clang/include/clang/Driver/Options.td
  clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
  clang/lib/Driver/ToolChains/CommonArgs.cpp
  clang/lib/Driver/ToolChains/CommonArgs.h
  clang/lib/Driver/ToolChains/Cuda.cpp
  clang/test/Driver/Inputs/hip_dev_lib/libomptarget-amdgcn-gfx803.bc
  clang/test/Driver/Inputs/hip_dev_lib/libomptarget-amdgcn-gfx906.bc
  clang/test/Driver/amdgpu-openmp-toolchain.c

Index: clang/test/Driver/amdgpu-openmp-toolchain.c
===
--- clang/test/Driver/amdgpu-openmp-toolchain.c
+++ clang/test/Driver/amdgpu-openmp-toolchain.c
@@ -1,11 +1,11 @@
 // REQUIRES: amdgpu-registered-target
-// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx906 %s 2>&1 \
+// RUN:   env LIBRARY_PATH=%S/Inputs/hip_dev_lib %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx906 %s 2>&1 \
 // RUN:   | FileCheck %s
 
 // verify the tools invocations
 // CHECK: clang{{.*}}"-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-x" "c"{{.*}}
 // CHECK: clang{{.*}}"-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-x" "ir"{{.*}}
-// CHECK: clang{{.*}}"-cc1"{{.*}}"-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx906" "-fcuda-is-device" "-emit-llvm-bc"{{.*}}
+// CHECK: clang{{.*}}"-cc1"{{.*}}"-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx906" "-fcuda-is-device" "-emit-llvm-bc" "-mlink-builtin-bitcode"{{.*}}libomptarget-amdgcn-gfx906.bc"{{.*}}
 // CHECK: llvm-link{{.*}}"-o" "{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906-linked-{{.*}}.bc"
 // CHECK: llc{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906-linked-{{.*}}.bc" "-mtriple=amdgcn-amd-amdhsa" "-mcpu=gfx906" "-filetype=obj" "-o"{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906-{{.*}}.o"
 // CHECK: lld{{.*}}"-flavor" "gnu" "--no-undefined" "-shared" "-o"{{.*}}amdgpu-openmp-toolchain-{{.*}}.out" "{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906-{{.*}}.o"
@@ -34,3 +34,6 @@
 // CHECK-PHASES: 15: assembler, {14}, object, (host-openmp)
 // CHECK-PHASES: 16: linker, {4, 15}, image, (host-openmp)
 
+// handling of --libomptarget-amdgcn-bc-path
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx803 --libomptarget-amdgcn-bc-path=%S/Inputs/hip_dev_lib/libomptarget-amdgcn-gfx803.bc %s 2>&1 | FileCheck %s --check-prefix=CHECK-LIBOMPTARGET
+// CHECK-LIBOMPTARGET: clang{{.*}}"-cc1"{{.*}}"-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx803" "-fcuda-is-device" "-emit-llvm-bc" "-mlink-builtin-bitcode"{{.*}}Inputs/hip_dev_lib/libomptarget-amdgcn-gfx803.bc"{{.*}}
\ No newline at end of file
Index: clang/lib/Driver/ToolChains/Cuda.cpp
===
--- clang/lib/Driver/ToolChains/Cuda.cpp
+++ clang/lib/Driver/ToolChains/Cuda.cpp
@@ -749,56 +749,8 @@
 CudaVersionToString(CudaInstallation.version(;
 
   if (DeviceOffloadingKind == Action::OFK_OpenMP) {
-SmallVector LibraryPaths;
-// Add user defined library paths from LIBRARY_PATH.
-llvm::Optional LibPath =
-llvm::sys::Process::GetEnv("LIBRARY_PATH");
-if (LibPath) {
-  SmallVector Frags;
-  const char EnvPathSeparatorStr[] = {llvm::sys::EnvPathSeparator, '\0'};
-  llvm::SplitString(*LibPath, Frags, EnvPathSeparatorStr);
-  for (StringRef Path : Frags)
-LibraryPaths.emplace_back(Path.trim());
-}
-
-// Add path to lib / lib64 folder.
-SmallString<256> DefaultLibPath =
-llvm::sys::path::parent_path(getDriver().Dir);
-llvm::sys::path::append(DefaultLibPath, Twine("lib") + CLANG_LIBDIR_SUFFIX);
-LibraryPaths.emplace_back(DefaultLibPath.c_str());
-
-// First check whether user specifies bc library
-if (const Arg *A =
-DriverArgs.getLastArg(options::OPT_libomptarget_nvptx_bc_path_EQ)) {
-  std::string LibOmpTargetName(A->getValue());
-  if (llvm::sys::fs::exists(LibOmpTargetName)) {
-CC1Args.push_back("-mlink-builtin-bitcode");
-CC1Args.push_back(DriverArgs.MakeArgString(LibOmpTargetName));
-  } else {
-getDriver().Diag(diag::err_drv_omp_offload_target_bcruntime_not_found)
-<< LibOmpTargetName;
-  }
-} else {
-  bool FoundBCLibrary = false;

  1   2   >