Hi pcc, eliben,
Allow CUDA host device functions with two code paths using __CUDA_ARCH__ to
differentiate between code path being compiled.
For example,
__host__ __device__ void host_device_function(void) {
#ifdef __CUDA_ARCH__
device_only_function();
#else
host_only_function();
#endif
}
http://reviews.llvm.org/D6457
Files:
lib/Sema/SemaCUDA.cpp
test/SemaCUDA/function-target.cu
Index: lib/Sema/SemaCUDA.cpp
===================================================================
--- lib/Sema/SemaCUDA.cpp
+++ lib/Sema/SemaCUDA.cpp
@@ -14,6 +14,7 @@
#include "clang/Sema/Sema.h"
#include "clang/AST/ASTContext.h"
#include "clang/AST/Decl.h"
+#include "clang/Lex/Preprocessor.h"
#include "clang/Sema/SemaDiagnostic.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallVector.h"
@@ -85,8 +86,18 @@
(CalleeTarget == CFT_Host || CalleeTarget == CFT_Global))
return true;
- if (CallerTarget == CFT_HostDevice && CalleeTarget != CFT_HostDevice)
- return true;
+ // CUDA B.1.3 "The __device__ and __host__ qualifiers can be used together
+ // however, in which case the function is compiled for both the host and the
+ // device. The __CUDA_ARCH__ macro... can be used to differentiate code paths
+ // between host and device."
+ bool InDeviceMode = getPreprocessor()
+ .getIdentifierInfo("__CUDA_ARCH__")
+ ->hasMacroDefinition();
+ if (CallerTarget == CFT_HostDevice && CalleeTarget != CFT_HostDevice) {
+ if ((InDeviceMode && CalleeTarget != CFT_Device) ||
+ (!InDeviceMode && CalleeTarget != CFT_Host))
+ return true;
+ }
return false;
}
Index: test/SemaCUDA/function-target.cu
===================================================================
--- test/SemaCUDA/function-target.cu
+++ test/SemaCUDA/function-target.cu
@@ -36,9 +36,30 @@
__host__ __device__ void hd1hd(void);
__global__ void hd1g(void); // expected-note {{'hd1g' declared here}}
-__host__ __device__ void hd1(void) {
- hd1h(); // expected-error {{no matching function}}
+// __CUDA_ARCH__ is not normally defined and undefined from within a program,
+// but rather originates from the compiler. It is being explicitly manipulated
+// here to test both compilation paths.
+#undef __CUDA_ARCH__
+__host__ __device__ void hd1_hostmode(void) {
+ hd1h(); // no longer an error as assumed to be in host mode
hd1d(); // expected-error {{no matching function}}
+#ifdef __CUDA_ARCH__
+ hd1d(); // no longer an error as guarded
+#else
+ hd1h(); // no longer an error as guarded
+#endif
hd1hd();
hd1g<<<1, 1>>>(); // expected-error {{reference to __global__ function 'hd1g' in __host__ __device__ function}}
}
+
+#define __CUDA_ARCH__ 250
+__host__ __device__ void hd1_devicemode(void) {
+ hd1h(); // expected-error {{no matching function}}
+ hd1d(); // no longer an error as assumed to be in device mode
+#ifdef __CUDA_ARCH__
+ hd1d(); // no longer an error as guarded
+#else
+ hd1h(); // no longer an error as guarded
+#endif
+ hd1hd();
+}
_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits