Re: [PATCH] D23627: [CUDA] Improve handling of math functions.
This revision was automatically updated to reflect the committed changes. Closed by commit rL279140: [CUDA] Improve handling of math functions. (authored by jlebar). Changed prior to commit: https://reviews.llvm.org/D23627?vs=68427&id=68600#toc Repository: rL LLVM https://reviews.llvm.org/D23627 Files: cfe/trunk/lib/Headers/__clang_cuda_cmath.h cfe/trunk/lib/Headers/__clang_cuda_math_forward_declares.h Index: cfe/trunk/lib/Headers/__clang_cuda_math_forward_declares.h === --- cfe/trunk/lib/Headers/__clang_cuda_math_forward_declares.h +++ cfe/trunk/lib/Headers/__clang_cuda_math_forward_declares.h @@ -140,6 +140,7 @@ __DEVICE__ long lrint(float); __DEVICE__ long lround(double); __DEVICE__ long lround(float); +__DEVICE__ long long llround(float); // No llround(double). __DEVICE__ double modf(double, double *); __DEVICE__ float modf(float, float *); __DEVICE__ double nan(const char *); @@ -149,7 +150,8 @@ __DEVICE__ double nextafter(double, double); __DEVICE__ float nextafter(float, float); __DEVICE__ double nexttoward(double, double); -__DEVICE__ float nexttoward(float, float); +__DEVICE__ float nexttoward(float, double); +__DEVICE__ float nexttowardf(float, double); __DEVICE__ double pow(double, double); __DEVICE__ double pow(double, int); __DEVICE__ float pow(float, float); @@ -235,6 +237,7 @@ using ::logb; using ::lrint; using ::lround; +using ::llround; using ::modf; using ::nan; using ::nanf; Index: cfe/trunk/lib/Headers/__clang_cuda_cmath.h === --- cfe/trunk/lib/Headers/__clang_cuda_cmath.h +++ cfe/trunk/lib/Headers/__clang_cuda_cmath.h @@ -26,13 +26,15 @@ #error "This file is for CUDA compilation only." #endif +#include + // CUDA lets us use various std math functions on the device side. This file // works in concert with __clang_cuda_math_forward_declares.h to make this work. // // Specifically, the forward-declares header declares __device__ overloads for // these functions in the global namespace, then pulls them into namespace std // with 'using' statements. Then this file implements those functions, after -// the implementations have been pulled in. +// their implementations have been pulled in. // // It's important that we declare the functions in the global namespace and pull // them into namespace std with using statements, as opposed to simply declaring @@ -120,12 +122,15 @@ __DEVICE__ float log(float __x) { return ::logf(__x); } __DEVICE__ float log10(float __x) { return ::log10f(__x); } __DEVICE__ float modf(float __x, float *__iptr) { return ::modff(__x, __iptr); } -__DEVICE__ float nexttoward(float __from, float __to) { +__DEVICE__ float nexttoward(float __from, double __to) { return __builtin_nexttowardf(__from, __to); } __DEVICE__ double nexttoward(double __from, double __to) { return __builtin_nexttoward(__from, __to); } +__DEVICE__ float nexttowardf(float __from, double __to) { + return __builtin_nexttowardf(__from, __to); +} __DEVICE__ float pow(float __base, float __exp) { return ::powf(__base, __exp); } @@ -143,6 +148,280 @@ __DEVICE__ float tan(float __x) { return ::tanf(__x); } __DEVICE__ float tanh(float __x) { return ::tanhf(__x); } +// Now we've defined everything we promised we'd define in +// __clang_cuda_math_forward_declares.h. We need to do two additional things to +// fix up our math functions. +// +// 1) Define __device__ overloads for e.g. sin(int). The CUDA headers define +//only sin(float) and sin(double), which means that e.g. sin(0) is +//ambiguous. +// +// 2) Pull the __device__ overloads of "foobarf" math functions into namespace +//std. These are defined in the CUDA headers in the global namespace, +//independent of everything else we've done here. + +// We can't use std::enable_if, because we want to be pre-C++11 compatible. But +// we go ahead and unconditionally define functions that are only available when +// compiling for C++11 to match the behavior of the CUDA headers. +template +struct __clang_cuda_enable_if {}; + +template struct __clang_cuda_enable_if { + typedef __T type; +}; + +// Defines an overload of __fn that accepts one integral argument, calls +// __fn((double)x), and returns __retty. +#define __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(__retty, __fn) \ + template \ + __DEVICE__ \ + typename __clang_cuda_enable_if::is_integer,\ + __retty>::type \ + __fn(__T __x) { \ +return ::__fn((double)__x);\ + } + +// Defines an overload of __fn that accepts one two arithmetic arguments, calls +// __fn((double)x, (double)y), and
Re: [PATCH] D23627: [CUDA] Improve handling of math functions.
jlebar added a comment. These changes have always been kind of scary. tra tested this against Thrust all combinations of CUDA 7.0/7.5, c++98/11, libc++/libstdc++{4.8.5/4.9.3,5.3.0}. So we should be good here. I hope. https://reviews.llvm.org/D23627 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D23627: [CUDA] Improve handling of math functions.
tra accepted this revision. tra added a comment. This revision is now accepted and ready to land. LGTM, but we may want someone familiar with math library to take a look. Comment at: clang/lib/Headers/__clang_cuda_cmath.h:125-133 @@ -122,8 +124,11 @@ __DEVICE__ float modf(float __x, float *__iptr) { return ::modff(__x, __iptr); } -__DEVICE__ float nexttoward(float __from, float __to) { +__DEVICE__ float nexttoward(float __from, double __to) { return __builtin_nexttowardf(__from, __to); } __DEVICE__ double nexttoward(double __from, double __to) { return __builtin_nexttoward(__from, __to); } +__DEVICE__ float nexttowardf(float __from, double __to) { + return __builtin_nexttowardf(__from, __to); +} __DEVICE__ float pow(float __base, float __exp) { jlebar wrote: > tra wrote: > > You've got two identical `nexttoward(float, double)` now. > > Perhaps first one was supposed to remain `nexttoward(float, float)` ? > > > > > It's hard to see, but one is nexttowardf. Indeed, I've missed that. https://reviews.llvm.org/D23627 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D23627: [CUDA] Improve handling of math functions.
jlebar added inline comments. Comment at: clang/lib/Headers/__clang_cuda_cmath.h:125-133 @@ -122,8 +124,11 @@ __DEVICE__ float modf(float __x, float *__iptr) { return ::modff(__x, __iptr); } -__DEVICE__ float nexttoward(float __from, float __to) { +__DEVICE__ float nexttoward(float __from, double __to) { return __builtin_nexttowardf(__from, __to); } __DEVICE__ double nexttoward(double __from, double __to) { return __builtin_nexttoward(__from, __to); } +__DEVICE__ float nexttowardf(float __from, double __to) { + return __builtin_nexttowardf(__from, __to); +} __DEVICE__ float pow(float __base, float __exp) { tra wrote: > You've got two identical `nexttoward(float, double)` now. > Perhaps first one was supposed to remain `nexttoward(float, float)` ? > > It's hard to see, but one is nexttowardf. Comment at: clang/lib/Headers/__clang_cuda_cmath.h:184-197 @@ +183,16 @@ + +// Defines an overload of __fn that accepts one two arithmetic arguments, calls +// __fn((double)x, (double)y), and returns a double. +// +// Note this is different from OVERLOAD_1, which generates an overload that +// accepts only *integral* arguments. +#define __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(__retty, __fn) \ + template \ + __DEVICE__ typename __clang_cuda_enable_if< \ + std::numeric_limits<__T1>::is_specialized && \ + std::numeric_limits<__T2>::is_specialized, \ + __retty>::type \ + __fn(__T1 __x, __T2 __y) { \ +return __fn((double)__x, (double)__y); \ + } + tra wrote: > `is_specialized` will be true for `long double` args and we'll instantiate > the function. Can we/should we produce an error instead? I think it's OK. Or at least, long double is kind of screwed up at the moment. Sometimes we pick `__host__` overloads, sometimes we pick `__device__` overloads; I made no effort to make it correct. I'm much more bullish on making use of long double a compile error as a way to solve these problems. https://reviews.llvm.org/D23627 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D23627: [CUDA] Improve handling of math functions.
tra added inline comments. Comment at: clang/lib/Headers/__clang_cuda_cmath.h:125-133 @@ -122,8 +124,11 @@ __DEVICE__ float modf(float __x, float *__iptr) { return ::modff(__x, __iptr); } -__DEVICE__ float nexttoward(float __from, float __to) { +__DEVICE__ float nexttoward(float __from, double __to) { return __builtin_nexttowardf(__from, __to); } __DEVICE__ double nexttoward(double __from, double __to) { return __builtin_nexttoward(__from, __to); } +__DEVICE__ float nexttowardf(float __from, double __to) { + return __builtin_nexttowardf(__from, __to); +} __DEVICE__ float pow(float __base, float __exp) { You've got two identical `nexttoward(float, double)` now. Perhaps first one was supposed to remain `nexttoward(float, float)` ? Comment at: clang/lib/Headers/__clang_cuda_cmath.h:184-197 @@ +183,16 @@ + +// Defines an overload of __fn that accepts one two arithmetic arguments, calls +// __fn((double)x, (double)y), and returns a double. +// +// Note this is different from OVERLOAD_1, which generates an overload that +// accepts only *integral* arguments. +#define __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(__retty, __fn) \ + template \ + __DEVICE__ typename __clang_cuda_enable_if< \ + std::numeric_limits<__T1>::is_specialized && \ + std::numeric_limits<__T2>::is_specialized, \ + __retty>::type \ + __fn(__T1 __x, __T2 __y) { \ +return __fn((double)__x, (double)__y); \ + } + `is_specialized` will be true for `long double` args and we'll instantiate the function. Can we/should we produce an error instead? https://reviews.llvm.org/D23627 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D23627: [CUDA] Improve handling of math functions.
jlebar created this revision. jlebar added a reviewer: tra. jlebar added a subscriber: cfe-commits. A bunch of related changes here to our CUDA math headers. - The second arg to nexttoward is a double (well, technically, long double, but we don't have that), not a float. - Add a forward-declare of llround(float), which is defined in the CUDA headers. We need this for the same reason we need most of the other forward-declares: To prevent a constexpr function in our standard library from becoming host+device. - Add nexttowardf implementation. - Pull "foobarf" functions defined by the CUDA headers in the global namespace into namespace std. This lets you do e.g. std::sinf. - Add overloads for math functions accepting integer types. This lets you do e.g. std::sin(0) without having an ambiguity between the overload that takes a float and the one that takes a double. With these changes, we pass testcases derived from libc++ for cmath and math.h. We can check these testcases in to the test-suite once support for CUDA lands there. https://reviews.llvm.org/D23627 Files: clang/lib/Headers/__clang_cuda_cmath.h clang/lib/Headers/__clang_cuda_math_forward_declares.h Index: clang/lib/Headers/__clang_cuda_math_forward_declares.h === --- clang/lib/Headers/__clang_cuda_math_forward_declares.h +++ clang/lib/Headers/__clang_cuda_math_forward_declares.h @@ -140,6 +140,7 @@ __DEVICE__ long lrint(float); __DEVICE__ long lround(double); __DEVICE__ long lround(float); +__DEVICE__ long long llround(float); // No llround(double). __DEVICE__ double modf(double, double *); __DEVICE__ float modf(float, float *); __DEVICE__ double nan(const char *); @@ -149,7 +150,8 @@ __DEVICE__ double nextafter(double, double); __DEVICE__ float nextafter(float, float); __DEVICE__ double nexttoward(double, double); -__DEVICE__ float nexttoward(float, float); +__DEVICE__ float nexttoward(float, double); +__DEVICE__ float nexttowardf(float, double); __DEVICE__ double pow(double, double); __DEVICE__ double pow(double, int); __DEVICE__ float pow(float, float); @@ -235,6 +237,7 @@ using ::logb; using ::lrint; using ::lround; +using ::llround; using ::modf; using ::nan; using ::nanf; Index: clang/lib/Headers/__clang_cuda_cmath.h === --- clang/lib/Headers/__clang_cuda_cmath.h +++ clang/lib/Headers/__clang_cuda_cmath.h @@ -26,13 +26,15 @@ #error "This file is for CUDA compilation only." #endif +#include + // CUDA lets us use various std math functions on the device side. This file // works in concert with __clang_cuda_math_forward_declares.h to make this work. // // Specifically, the forward-declares header declares __device__ overloads for // these functions in the global namespace, then pulls them into namespace std // with 'using' statements. Then this file implements those functions, after -// the implementations have been pulled in. +// their implementations have been pulled in. // // It's important that we declare the functions in the global namespace and pull // them into namespace std with using statements, as opposed to simply declaring @@ -120,12 +122,15 @@ __DEVICE__ float log(float __x) { return ::logf(__x); } __DEVICE__ float log10(float __x) { return ::log10f(__x); } __DEVICE__ float modf(float __x, float *__iptr) { return ::modff(__x, __iptr); } -__DEVICE__ float nexttoward(float __from, float __to) { +__DEVICE__ float nexttoward(float __from, double __to) { return __builtin_nexttowardf(__from, __to); } __DEVICE__ double nexttoward(double __from, double __to) { return __builtin_nexttoward(__from, __to); } +__DEVICE__ float nexttowardf(float __from, double __to) { + return __builtin_nexttowardf(__from, __to); +} __DEVICE__ float pow(float __base, float __exp) { return ::powf(__base, __exp); } @@ -143,6 +148,280 @@ __DEVICE__ float tan(float __x) { return ::tanf(__x); } __DEVICE__ float tanh(float __x) { return ::tanhf(__x); } +// Now we've defined everything we promised we'd define in +// __clang_cuda_math_forward_declares.h. We need to do two additional things to +// fix up our math functions. +// +// 1) Define __device__ overloads for e.g. sin(int). The CUDA headers define +//only sin(float) and sin(double), which means that e.g. sin(0) is +//ambiguous. +// +// 2) Pull the __device__ overloads of "foobarf" math functions into namespace +//std. These are defined in the CUDA headers in the global namespace, +//independent of everything else we've done here. + +// We can't use std::enable_if, because we want to be pre-C++11 compatible. But +// we go ahead and unconditionally define functions that are only available when +// compiling for C++11 to match the behavior of the CUDA headers. +template +struct __clang_cuda_enable_if {}; + +template struct __clang_cuda_enable_if { + typedef __T type; +}; + +// Defines an