[PATCH] D60907: [OpenMP][WIP] Add math functions support in OpenMP offloading

2019-04-25 Thread Gheorghe-Teodor Bercea via Phabricator via cfe-commits
gtbercea updated this revision to Diff 196619.
gtbercea added a comment.

- Use macros.


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D60907/new/

https://reviews.llvm.org/D60907

Files:
  include/clang/Driver/ToolChain.h
  lib/Driver/ToolChains/Clang.cpp
  lib/Driver/ToolChains/Cuda.cpp
  lib/Driver/ToolChains/Cuda.h
  lib/Headers/CMakeLists.txt
  lib/Headers/__clang_openmp_math.h

Index: lib/Headers/__clang_openmp_math.h
===
--- /dev/null
+++ lib/Headers/__clang_openmp_math.h
@@ -0,0 +1,95 @@
+/*=== __clang_openmp_math.h - Target OpenMP math support ---===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===---===
+ */
+
+#ifndef __CLANG_OPENMP_MATH_H__
+#define __CLANG_OPENMP_MATH_H__
+
+#pragma omp declare target
+
+// Declarations of function in libomptarget
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+// POW
+float __kmpc_powf(float, float);
+double __kmpc_pow(double, double);
+long double __kmpc_powl(long double, long double);
+
+// LOG
+double __kmpc_log(double);
+float __kmpc_logf(float);
+double __kmpc_log10(double);
+float __kmpc_log10f(float);
+double __kmpc_log1p(double);
+float __kmpc_log1pf(float);
+double __kmpc_log2(double);
+float __kmpc_log2f(float);
+double __kmpc_logb(double);
+float __kmpc_logbf(float);
+
+// SIN
+float __kmpc_sinf(float);
+double __kmpc_sin(double);
+long double __kmpc_sinl(long double);
+
+// COS
+float __kmpc_cosf(float);
+double __kmpc_cos(double);
+long double __kmpc_cosl(long double);
+
+#if defined(__cplusplus)
+}
+#endif
+
+// Single argument functions
+#define __OPENMP_MATH_FUNC_1(__ty, __fn, __kmpc_fn)\
+  __attribute__((always_inline, used)) static __ty \
+  __fn(__ty __x) { \
+return __kmpc_fn(__x); \
+  }
+
+// Double argument functions
+#define __OPENMP_MATH_FUNC_2(__ty, __fn, __kmpc_fn)\
+  __attribute__((always_inline, used)) static __ty \
+  __fn(__ty __x, __ty __y) {   \
+return __kmpc_fn(__x, __y);\
+  }
+
+// POW
+__OPENMP_MATH_FUNC_2(float, powf, __kmpc_powf);
+__OPENMP_MATH_FUNC_2(double, pow, __kmpc_pow);
+__OPENMP_MATH_FUNC_2(long double, powl, __kmpc_powl);
+
+// LOG
+__OPENMP_MATH_FUNC_1(double, log, __kmpc_log);
+__OPENMP_MATH_FUNC_1(float, logf, __kmpc_logf);
+__OPENMP_MATH_FUNC_1(double, log10, __kmpc_log10);
+__OPENMP_MATH_FUNC_1(float, log10f, __kmpc_log10f);
+__OPENMP_MATH_FUNC_1(double, log1p, __kmpc_log1p);
+__OPENMP_MATH_FUNC_1(float, log1pf, __kmpc_log1pf);
+__OPENMP_MATH_FUNC_1(double, log2, __kmpc_log2);
+__OPENMP_MATH_FUNC_1(float, log2f, __kmpc_log2f);
+__OPENMP_MATH_FUNC_1(double, logb, __kmpc_logb);
+__OPENMP_MATH_FUNC_1(float, logbf, __kmpc_logbf);
+
+// SIN
+__OPENMP_MATH_FUNC_1(float, sinf, __kmpc_sinf);
+__OPENMP_MATH_FUNC_1(double, sin, __kmpc_sin);
+__OPENMP_MATH_FUNC_1(long double, sinl, __kmpc_sinl);
+
+// COS
+__OPENMP_MATH_FUNC_1(float, cosf, __kmpc_cosf);
+__OPENMP_MATH_FUNC_1(double, cos, __kmpc_cos);
+__OPENMP_MATH_FUNC_1(long double, cosl, __kmpc_cosl);
+
+#pragma omp end declare target
+
+#endif
+
Index: lib/Headers/CMakeLists.txt
===
--- lib/Headers/CMakeLists.txt
+++ lib/Headers/CMakeLists.txt
@@ -31,6 +31,7 @@
   avxintrin.h
   bmi2intrin.h
   bmiintrin.h
+  __clang_openmp_math.h
   __clang_cuda_builtin_vars.h
   __clang_cuda_cmath.h
   __clang_cuda_complex_builtins.h
Index: lib/Driver/ToolChains/Cuda.h
===
--- lib/Driver/ToolChains/Cuda.h
+++ lib/Driver/ToolChains/Cuda.h
@@ -48,6 +48,9 @@
   void AddCudaIncludeArgs(const llvm::opt::ArgList ,
   llvm::opt::ArgStringList ) const;
 
+  void AddMathDeviceFunctions(const llvm::opt::ArgList ,
+  llvm::opt::ArgStringList ) const;
+
   /// Emit an error if Version does not support the given Arch.
   ///
   /// If either Version or Arch is unknown, does not emit an error.  Emits at
@@ -165,6 +168,9 @@
   void AddCudaIncludeArgs(const llvm::opt::ArgList ,
   llvm::opt::ArgStringList ) const override;
 
+  void AddMathDeviceFunctions(const llvm::opt::ArgList ,
+  llvm::opt::ArgStringList ) const override;
+
   void addClangWarningOptions(llvm::opt::ArgStringList ) const override;
   CXXStdlibType GetCXXStdlibType(const llvm::opt::ArgList ) const override;
   void
Index: 

[PATCH] D60907: [OpenMP][WIP] Add math functions support in OpenMP offloading

2019-04-24 Thread Gheorghe-Teodor Bercea via Phabricator via cfe-commits
gtbercea added a subscriber: gregrodgers.
gtbercea added a comment.

@gregrodgers


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D60907/new/

https://reviews.llvm.org/D60907



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D60907: [OpenMP][WIP] Add math functions support in OpenMP offloading

2019-04-24 Thread Gheorghe-Teodor Bercea via Phabricator via cfe-commits
gtbercea added a comment.

In D60907#1473406 , @Hahnfeld wrote:

> So the scheme is: `pow` is defined in `__clang_openmp_math.h` to call 
> `__kmpc_pow`. This lives in `libomptarget-nvptx` (both bc and static lib) and 
> just calls `pow` which works because `nvcc` and Clang in CUDA mode make sure 
> that the call gets routed into `libdevice`?
>
> Did you test that something like `pow(d, 2)` is optimized by LLVM to `d * d`? 
> There's a pass doing so (can't recall the name) and from my previous attempts 
> it didn't work well if you hid the function name instead of the known `pow` 
> one.


The transformation was blocked because of a check in optimizePow() this was 
preventing pow(x,2) from becoming x*x. By adding the pow functions to the TLI 
the transformation now applies. This has now been fixed. SQRT is eliminated as 
per usual, no change for that.


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D60907/new/

https://reviews.llvm.org/D60907



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D60907: [OpenMP][WIP] Add math functions support in OpenMP offloading

2019-04-20 Thread Jonas Hahnfeld via Phabricator via cfe-commits
Hahnfeld added a comment.

So the scheme is: `pow` is defined in `__clang_openmp_math.h` to call 
`__kmpc_pow`. This lives in `libomptarget-nvptx` (both bc and static lib) and 
just calls `pow` which works because `nvcc` and Clang in CUDA mode make sure 
that the call gets routed into `libdevice`?

Did you test that something like `pow(d, 2)` is optimized by LLVM to `d * d`? 
There's a pass doing so (can't recall the name) and from my previous attempts 
it didn't work well if you hid the function name instead of the known `pow` one.


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D60907/new/

https://reviews.llvm.org/D60907



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D60907: [OpenMP][WIP] Add math functions support in OpenMP offloading

2019-04-19 Thread Gheorghe-Teodor Bercea via Phabricator via cfe-commits
gtbercea updated this revision to Diff 195915.
gtbercea edited the summary of this revision.
gtbercea added a comment.

- Address comments.


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D60907/new/

https://reviews.llvm.org/D60907

Files:
  include/clang/Driver/ToolChain.h
  lib/Driver/ToolChains/Clang.cpp
  lib/Driver/ToolChains/Cuda.cpp
  lib/Driver/ToolChains/Cuda.h
  lib/Headers/CMakeLists.txt
  lib/Headers/__clang_openmp_math.h

Index: lib/Headers/__clang_openmp_math.h
===
--- /dev/null
+++ lib/Headers/__clang_openmp_math.h
@@ -0,0 +1,65 @@
+/*=== __clang_openmp_math.h - Target OpenMP math support ---===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===---===
+ */
+
+#ifndef __CLANG_OPENMP_MATH_H__
+#define __CLANG_OPENMP_MATH_H__
+
+#pragma omp declare target
+
+// Declarations of function in libomptarget
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+// POW
+float __kmpc_powf(float, float);
+double __kmpc_pow(double, double);
+long double __kmpc_powl(long double, long double);
+
+// SIN
+float __kmpc_sinf(float);
+double __kmpc_sin(double);
+long double __kmpc_sinl(long double);
+
+#if defined(__cplusplus)
+}
+#endif
+
+// POW
+__attribute__((always_inline, used)) static float powf(float a, float b) {
+  return __kmpc_powf(a, b);
+}
+
+__attribute__((always_inline, used)) static double pow(double a, double b) {
+  return __kmpc_pow(a, b);
+}
+
+__attribute__((always_inline, used)) static long double powl(
+	long double a, long double b) {
+  return __kmpc_powl(a, b);
+}
+
+// SIN
+__attribute__((always_inline, used)) static float sinf(float a) {
+  return __kmpc_sinf(a);
+}
+
+__attribute__((always_inline, used)) static double sin(double a) {
+  return __kmpc_sin(a);
+}
+
+__attribute__((always_inline, used)) static long double sinl(
+	long double a) {
+  return __kmpc_sinl(a);
+}
+
+#pragma omp end declare target
+
+#endif
+
Index: lib/Headers/CMakeLists.txt
===
--- lib/Headers/CMakeLists.txt
+++ lib/Headers/CMakeLists.txt
@@ -31,6 +31,7 @@
   avxintrin.h
   bmi2intrin.h
   bmiintrin.h
+  __clang_openmp_math.h
   __clang_cuda_builtin_vars.h
   __clang_cuda_cmath.h
   __clang_cuda_complex_builtins.h
Index: lib/Driver/ToolChains/Cuda.h
===
--- lib/Driver/ToolChains/Cuda.h
+++ lib/Driver/ToolChains/Cuda.h
@@ -48,6 +48,9 @@
   void AddCudaIncludeArgs(const llvm::opt::ArgList ,
   llvm::opt::ArgStringList ) const;
 
+  void AddMathDeviceFunctions(const llvm::opt::ArgList ,
+  llvm::opt::ArgStringList ) const;
+
   /// Emit an error if Version does not support the given Arch.
   ///
   /// If either Version or Arch is unknown, does not emit an error.  Emits at
@@ -165,6 +168,9 @@
   void AddCudaIncludeArgs(const llvm::opt::ArgList ,
   llvm::opt::ArgStringList ) const override;
 
+  void AddMathDeviceFunctions(const llvm::opt::ArgList ,
+  llvm::opt::ArgStringList ) const override;
+
   void addClangWarningOptions(llvm::opt::ArgStringList ) const override;
   CXXStdlibType GetCXXStdlibType(const llvm::opt::ArgList ) const override;
   void
Index: lib/Driver/ToolChains/Cuda.cpp
===
--- lib/Driver/ToolChains/Cuda.cpp
+++ lib/Driver/ToolChains/Cuda.cpp
@@ -255,6 +255,16 @@
   CC1Args.push_back("__clang_cuda_runtime_wrapper.h");
 }
 
+void CudaInstallationDetector::AddMathDeviceFunctions(
+const ArgList , ArgStringList ) const {
+  CC1Args.push_back("-internal-isystem");
+  CC1Args.push_back(DriverArgs.MakeArgString(getIncludePath()));
+  CC1Args.push_back("-include");
+  CC1Args.push_back("__clang_openmp_math.h");
+  CC1Args.push_back("-I");
+  CC1Args.push_back(DriverArgs.MakeArgString(getIncludePath()));
+}
+
 void CudaInstallationDetector::CheckCudaVersionSupportsArch(
 CudaArch Arch) const {
   if (Arch == CudaArch::UNKNOWN || Version == CudaVersion::UNKNOWN ||
@@ -898,6 +908,11 @@
   CudaInstallation.AddCudaIncludeArgs(DriverArgs, CC1Args);
 }
 
+void CudaToolChain::AddMathDeviceFunctions(
+const ArgList , ArgStringList ) const {
+  CudaInstallation.AddMathDeviceFunctions(DriverArgs, CC1Args);
+}
+
 llvm::opt::DerivedArgList *
 CudaToolChain::TranslateArgs(const llvm::opt::DerivedArgList ,
  StringRef BoundArch,
Index: lib/Driver/ToolChains/Clang.cpp
===
--- lib/Driver/ToolChains/Clang.cpp
+++ lib/Driver/ToolChains/Clang.cpp
@@ -1150,6 +1150,14 @@
   if 

[PATCH] D60907: [OpenMP][WIP] Add math functions support in OpenMP offloading

2019-04-19 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert added a comment.

To follow up on my comment why this is NVPTX specific:

Is there a reason why this has to happen in the Cuda ToolChain part?
I would have assumed us to add the declarations similar to the ones provided in 
`__clang_openmp_math.h` whenever we may compile for a target.
So, if we have any OpenMP target related code in the TU, we add the header 
`__clang_openmp_target_math.h` which defines "common" math functions as you did 
in `__clang_openmp_math.h` (without the NVPTX guard). The runtime will then 
implement `__kmpc_` as it sees fit.


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D60907/new/

https://reviews.llvm.org/D60907



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D60907: [OpenMP][WIP] Add math functions support in OpenMP offloading

2019-04-19 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert added inline comments.



Comment at: include/clang/Driver/ToolChain.h:575
 
+  /// Add arguments to use system-specific CUDA includes.
+  virtual void AddMathDeviceFunctions(const llvm::opt::ArgList ,

Copy & Past comment



Comment at: lib/Headers/__clang_openmp_math.h:5
+
+#ifdef __NVPTX__
+#pragma omp declare target

Why is this NVPTX specific?


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D60907/new/

https://reviews.llvm.org/D60907



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D60907: [OpenMP][WIP] Add math functions support in OpenMP offloading

2019-04-19 Thread Alexey Bataev via Phabricator via cfe-commits
ABataev added inline comments.



Comment at: lib/Headers/__clang_openmp_math.h:2
+
+#ifndef __CLANG_OMP_CMATH_H__
+#define __CLANG_OMP_CMATH_H__

Why `__CLANG_OMP_CMATH_H__`? Your file is `..._math.h`, not `..._cmath.h`. 
Plus, seems to me, you're missing standard header for the file.


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D60907/new/

https://reviews.llvm.org/D60907



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D60907: [OpenMP][WIP] Add math functions support in OpenMP offloading

2019-04-19 Thread Alexey Bataev via Phabricator via cfe-commits
ABataev added inline comments.



Comment at: lib/Headers/__clang_openmp_math.h:14
+double __kmpc_pow(double, double);
+double __kmpc_sin(double);
+

Also, versions for float and long double



Comment at: lib/Headers/__clang_openmp_math.h:21
+// Define existing function to call kmpc functions.
+__attribute__((always_inline, used)) static double pow(double a, double b) {
+  return __kmpc_pow(a, b);

Add `powf(float)`, `powl(long double)`, `sinf(float)`, `sinl(long double)`


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D60907/new/

https://reviews.llvm.org/D60907



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D60907: [OpenMP][WIP] Add math functions support in OpenMP offloading

2019-04-19 Thread Gheorghe-Teodor Bercea via Phabricator via cfe-commits
gtbercea created this revision.
gtbercea added reviewers: ABataev, hfinkel, caomhin.
Herald added subscribers: cfe-commits, jdoerfert, guansong, mgorny.
Herald added a project: clang.
gtbercea added a reviewer: tra.
gtbercea added parent revisions: D60906: [OpenMP][libomptarget][WIP] Add math 
functions support in OpenMP offloading, D60905: [OpenMP][LLVM][WIP] Add math 
functions support to OpenMP.
gtbercea edited the summary of this revision.

This patch adds an OpenMP specific math functions header to the lib/Headers 
folder and ensures it is passed to Clang.

Note:
This is an example of how support for math functions could be implemented. 
Before expanding this to include other math functions please let me know if you 
have any comments, concerns or proposed changes.


Repository:
  rC Clang

https://reviews.llvm.org/D60907

Files:
  include/clang/Driver/ToolChain.h
  lib/Driver/ToolChains/Clang.cpp
  lib/Driver/ToolChains/Cuda.cpp
  lib/Driver/ToolChains/Cuda.h
  lib/Headers/CMakeLists.txt
  lib/Headers/__clang_openmp_math.h

Index: lib/Headers/__clang_openmp_math.h
===
--- /dev/null
+++ lib/Headers/__clang_openmp_math.h
@@ -0,0 +1,33 @@
+
+#ifndef __CLANG_OMP_CMATH_H__
+#define __CLANG_OMP_CMATH_H__
+
+#ifdef __NVPTX__
+#pragma omp declare target
+
+// Declarations of function in libomptarget
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+double __kmpc_pow(double, double);
+double __kmpc_sin(double);
+
+#if defined(__cplusplus)
+}
+#endif
+
+// Define existing function to call kmpc functions.
+__attribute__((always_inline, used)) static double pow(double a, double b) {
+  return __kmpc_pow(a, b);
+}
+
+__attribute__((always_inline, used)) static double sin(double a) {
+  return __kmpc_sin(a);
+}
+
+#pragma omp end declare target
+#endif
+
+#endif
+
Index: lib/Headers/CMakeLists.txt
===
--- lib/Headers/CMakeLists.txt
+++ lib/Headers/CMakeLists.txt
@@ -31,6 +31,7 @@
   avxintrin.h
   bmi2intrin.h
   bmiintrin.h
+  __clang_openmp_math.h
   __clang_cuda_builtin_vars.h
   __clang_cuda_cmath.h
   __clang_cuda_complex_builtins.h
Index: lib/Driver/ToolChains/Cuda.h
===
--- lib/Driver/ToolChains/Cuda.h
+++ lib/Driver/ToolChains/Cuda.h
@@ -48,6 +48,9 @@
   void AddCudaIncludeArgs(const llvm::opt::ArgList ,
   llvm::opt::ArgStringList ) const;
 
+  void AddMathDeviceFunctions(const llvm::opt::ArgList ,
+  llvm::opt::ArgStringList ) const;
+
   /// Emit an error if Version does not support the given Arch.
   ///
   /// If either Version or Arch is unknown, does not emit an error.  Emits at
@@ -165,6 +168,9 @@
   void AddCudaIncludeArgs(const llvm::opt::ArgList ,
   llvm::opt::ArgStringList ) const override;
 
+  void AddMathDeviceFunctions(const llvm::opt::ArgList ,
+  llvm::opt::ArgStringList ) const override;
+
   void addClangWarningOptions(llvm::opt::ArgStringList ) const override;
   CXXStdlibType GetCXXStdlibType(const llvm::opt::ArgList ) const override;
   void
Index: lib/Driver/ToolChains/Cuda.cpp
===
--- lib/Driver/ToolChains/Cuda.cpp
+++ lib/Driver/ToolChains/Cuda.cpp
@@ -255,6 +255,16 @@
   CC1Args.push_back("__clang_cuda_runtime_wrapper.h");
 }
 
+void CudaInstallationDetector::AddMathDeviceFunctions(
+const ArgList , ArgStringList ) const {
+  CC1Args.push_back("-internal-isystem");
+  CC1Args.push_back(DriverArgs.MakeArgString(getIncludePath()));
+  CC1Args.push_back("-include");
+  CC1Args.push_back("__clang_openmp_math.h");
+  CC1Args.push_back("-I");
+  CC1Args.push_back(DriverArgs.MakeArgString(getIncludePath()));
+}
+
 void CudaInstallationDetector::CheckCudaVersionSupportsArch(
 CudaArch Arch) const {
   if (Arch == CudaArch::UNKNOWN || Version == CudaVersion::UNKNOWN ||
@@ -898,6 +908,11 @@
   CudaInstallation.AddCudaIncludeArgs(DriverArgs, CC1Args);
 }
 
+void CudaToolChain::AddMathDeviceFunctions(
+const ArgList , ArgStringList ) const {
+  CudaInstallation.AddMathDeviceFunctions(DriverArgs, CC1Args);
+}
+
 llvm::opt::DerivedArgList *
 CudaToolChain::TranslateArgs(const llvm::opt::DerivedArgList ,
  StringRef BoundArch,
Index: lib/Driver/ToolChains/Clang.cpp
===
--- lib/Driver/ToolChains/Clang.cpp
+++ lib/Driver/ToolChains/Clang.cpp
@@ -1150,6 +1150,14 @@
   if (JA.isOffloading(Action::OFK_Cuda))
 getToolChain().AddCudaIncludeArgs(Args, CmdArgs);
 
+  // If we are offloading to a target via OpenMP and this target happens
+  // to be an NVIDIA GPU then we need to include the CUDA runtime wrapper
+  // to ensure the correct math functions are called in the offloaded
+  // code.
+  if (JA.isDeviceOffloading(Action::OFK_OpenMP)