Hi pcc, eliben,

Allow CUDA host device functions with two code paths using __CUDA_ARCH__ to 
differentiate between code path being compiled.

For example,
__host__ __device__ void host_device_function(void) {
#ifdef __CUDA_ARCH__
  device_only_function();
#else
  host_only_function();
#endif
}

http://reviews.llvm.org/D6457

Files:
  lib/Sema/SemaCUDA.cpp
  test/SemaCUDA/function-target.cu
Index: lib/Sema/SemaCUDA.cpp
===================================================================
--- lib/Sema/SemaCUDA.cpp
+++ lib/Sema/SemaCUDA.cpp
@@ -14,6 +14,7 @@
 #include "clang/Sema/Sema.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/Decl.h"
+#include "clang/Lex/Preprocessor.h"
 #include "clang/Sema/SemaDiagnostic.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/SmallVector.h"
@@ -85,8 +86,18 @@
       (CalleeTarget == CFT_Host || CalleeTarget == CFT_Global))
     return true;
 
-  if (CallerTarget == CFT_HostDevice && CalleeTarget != CFT_HostDevice)
-    return true;
+  // CUDA B.1.3 "The __device__ and __host__ qualifiers can be used together
+  // however, in which case the function is compiled for both the host and the
+  // device. The __CUDA_ARCH__ macro... can be used to differentiate code paths
+  // between host and device."
+  bool InDeviceMode = getPreprocessor()
+                          .getIdentifierInfo("__CUDA_ARCH__")
+                          ->hasMacroDefinition();
+  if (CallerTarget == CFT_HostDevice && CalleeTarget != CFT_HostDevice) {
+    if ((InDeviceMode && CalleeTarget != CFT_Device) ||
+        (!InDeviceMode && CalleeTarget != CFT_Host))
+      return true;
+  }
 
   return false;
 }
Index: test/SemaCUDA/function-target.cu
===================================================================
--- test/SemaCUDA/function-target.cu
+++ test/SemaCUDA/function-target.cu
@@ -36,9 +36,30 @@
 __host__ __device__ void hd1hd(void);
 __global__ void hd1g(void); // expected-note {{'hd1g' declared here}}
 
-__host__ __device__ void hd1(void) {
-  hd1h(); // expected-error {{no matching function}}
+// __CUDA_ARCH__ is not normally defined and undefined from within a program,
+// but rather originates from the compiler. It is being explicitly manipulated
+// here to test both compilation paths.
+#undef __CUDA_ARCH__
+__host__ __device__ void hd1_hostmode(void) {
+  hd1h(); // no longer an error as assumed to be in host mode
   hd1d(); // expected-error {{no matching function}}
+#ifdef __CUDA_ARCH__
+  hd1d(); // no longer an error as guarded
+#else
+  hd1h(); // no longer an error as guarded
+#endif
   hd1hd();
   hd1g<<<1, 1>>>(); // expected-error {{reference to __global__ function 'hd1g' in __host__ __device__ function}}
 }
+
+#define __CUDA_ARCH__ 250
+__host__ __device__ void hd1_devicemode(void) {
+  hd1h(); // expected-error {{no matching function}}
+  hd1d(); // no longer an error as assumed to be in device mode
+#ifdef __CUDA_ARCH__
+  hd1d(); // no longer an error as guarded
+#else
+  hd1h(); // no longer an error as guarded
+#endif
+  hd1hd();
+}
_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits

Reply via email to