Re: [PATCH] D17779: [CUDA] Emit host-side 'shadows' for device-side global variables

2016-03-01 Thread Artem Belevich via cfe-commits
tra updated this revision to Diff 49561.
tra marked 9 inline comments as done.
tra added a comment.

Addressed Justin's comments.


http://reviews.llvm.org/D17779

Files:
  lib/CodeGen/CGCUDANV.cpp
  lib/CodeGen/CGCUDARuntime.h
  lib/CodeGen/CodeGenModule.cpp
  test/CodeGenCUDA/device-stub.cu
  test/CodeGenCUDA/filter-decl.cu

Index: test/CodeGenCUDA/filter-decl.cu
===
--- test/CodeGenCUDA/filter-decl.cu
+++ test/CodeGenCUDA/filter-decl.cu
@@ -9,15 +9,15 @@
 // CHECK-DEVICE-NOT: module asm "file scope asm is host only"
 __asm__("file scope asm is host only");
 
-// CHECK-HOST-NOT: constantdata = externally_initialized global
+// CHECK-HOST: constantdata = internal global
 // CHECK-DEVICE: constantdata = externally_initialized global
 __constant__ char constantdata[256];
 
-// CHECK-HOST-NOT: devicedata = externally_initialized global
+// CHECK-HOST: devicedata = internal global
 // CHECK-DEVICE: devicedata = externally_initialized global
 __device__ char devicedata[256];
 
-// CHECK-HOST-NOT: shareddata = global
+// CHECK-HOST: shareddata = internal global
 // CHECK-DEVICE: shareddata = global
 __shared__ char shareddata[256];
 
Index: test/CodeGenCUDA/device-stub.cu
===
--- test/CodeGenCUDA/device-stub.cu
+++ test/CodeGenCUDA/device-stub.cu
@@ -2,6 +2,40 @@
 
 #include "Inputs/cuda.h"
 
+// CHECK-DAG: @device_var = internal global i32
+__device__ int device_var;
+
+// CHECK-DAG: @constant_var = internal global i32
+__constant__ int constant_var;
+
+// CHECK-DAG: @shared_var = internal global i32
+__shared__ int shared_var;
+
+// Make sure host globals don't get internalized...
+// CHECK-DAG: @host_var = global i32
+int host_var;
+// ... and that extern vars remain external.
+// CHECK-DAG: @ext_host_var = external global i32
+extern int ext_host_var;
+
+// Shadows for external device-side variables are *definitions* of
+// those variables.
+// CHECK-DAG: @ext_device_var = internal global i32
+extern __device__ int ext_device_var;
+// CHECK-DAG: @ext_device_var = internal global i32
+extern __constant__ int ext_constant_var;
+
+void use_pointers() {
+  int *p;
+  p = _var;
+  p = _var;
+  p = _var;
+  p = _var;
+  p = _device_var;
+  p = _constant_var;
+  p = _host_var;
+}
+
 // Make sure that all parts of GPU code init/cleanup are there:
 // * constant unnamed string with the kernel name
 // CHECK: private unnamed_addr constant{{.*}}kernelfunc{{.*}}\00"
@@ -32,18 +66,23 @@
 // CHECK: call{{.*}}kernelfunc
 void hostfunc(void) { kernelfunc<<<1, 1>>>(1, 1, 1); }
 
-// Test that we've built a function to register kernels
-// CHECK: define internal void @__cuda_register_kernels
+// Test that we've built a function to register kernels and global vars.
+// CHECK: define internal void @__cuda_register_globals
 // CHECK: call{{.*}}cudaRegisterFunction(i8** %0, {{.*}}kernelfunc
+// CHECK-DAG: call{{.*}}cudaRegisterVar(i8** %0, {{.*}}device_var{{.*}}i32 0, i32 4, i32 0, i32 0
+// CHECK-DAG: call{{.*}}cudaRegisterVar(i8** %0, {{.*}}constant_var{{.*}}i32 0, i32 4, i32 1, i32 0
+// CHECK-DAG: call{{.*}}cudaRegisterVar(i8** %0, {{.*}}ext_device_var{{.*}}i32 1, i32 4, i32 0, i32 0
+// CHECK-DAG: call{{.*}}cudaRegisterVar(i8** %0, {{.*}}ext_constant_var{{.*}}i32 1, i32 4, i32 1, i32 0
+// CHECK: ret void
 
 // Test that we've built contructor..
 // CHECK: define internal void @__cuda_module_ctor
 //   .. that calls __cudaRegisterFatBinary(&__cuda_fatbin_wrapper)
 // CHECK: call{{.*}}cudaRegisterFatBinary{{.*}}__cuda_fatbin_wrapper
 //   .. stores return value in __cuda_gpubin_handle
 // CHECK-NEXT: store{{.*}}__cuda_gpubin_handle
-//   .. and then calls __cuda_register_kernels
-// CHECK-NEXT: call void @__cuda_register_kernels
+//   .. and then calls __cuda_register_globals
+// CHECK-NEXT: call void @__cuda_register_globals
 
 // Test that we've created destructor.
 // CHECK: define internal void @__cuda_module_dtor
Index: lib/CodeGen/CodeGenModule.cpp
===
--- lib/CodeGen/CodeGenModule.cpp
+++ lib/CodeGen/CodeGenModule.cpp
@@ -1528,11 +1528,18 @@
   !Global->hasAttr())
 return;
 } else {
-  if (!Global->hasAttr() && (
-Global->hasAttr() ||
-Global->hasAttr() ||
-Global->hasAttr()))
+  // We need to emit host-side 'shadows' for all global
+  // device-side variables because the CUDA runtime needs their
+  // size and host-side address in order to provide access to
+  // their device-side incarnations.
+
+  // So device-only functions are the only things we skip.
+  if (isa(Global) && !Global->hasAttr() &&
+  Global->hasAttr())
 return;
+
+  assert((isa(Global) || isa(Global)) &&
+ "Expected Variable or Function");
 }
   }
 
@@ -1561,8 +1568,15 @@
   } else {
 const auto *VD = cast(Global);
 

Re: [PATCH] D17779: [CUDA] Emit host-side 'shadows' for device-side global variables

2016-03-01 Thread Justin Lebar via cfe-commits
jlebar added inline comments.


Comment at: lib/CodeGen/CGCUDANV.cpp:168
@@ -163,1 +167,3 @@
+/// of global scope device-side variables generated in this module
+/// with the CUDA runtime.
 /// \code

This is kind of hard to parse.  How about rephrasing to something like:

Creates a function that sets up state on the host side for CUDA objects that 
have a presence on both the host and device sides.  Specifically, registers the 
host side of kernel functions and __device__ global variables with the CUDA 
runtime.


Comment at: lib/CodeGen/CGCUDANV.cpp:213
@@ +212,3 @@
+  // void __cudaRegisterVar(void **, char *, char *, const char *,
+  //int, int, int, int)
+  std::vector RegisterVarParams = {

Can we say what these args mean?


Comment at: lib/CodeGen/CGCUDANV.cpp:224
@@ +223,3 @@
+llvm::Constant *VarName = makeConstantString(Var->getName());
+llvm::Value *args[] = {
+, Builder.CreateBitCast(Var, VoidPtrTy), VarName,

Nit: s/args/Args/?


Comment at: lib/CodeGen/CGCUDANV.cpp:228
@@ +227,3 @@
+llvm::ConstantInt::get(IntTy, CGM.getDataLayout().getTypeAllocSize(
+  Var->getValueType())), // sizeof(var)
+llvm::ConstantInt::get(IntTy, (Flags & DevVarConst) ? 1 : 0),

Nit: Maybe pull this expression out as a separate var?  Then the comment isn't 
needed (would be nice, because at the moment it's ambiguous exactly what 
"sizeof(var)" refers to.


Comment at: lib/CodeGen/CodeGenModule.cpp:1532
@@ +1531,3 @@
+  // We need to emit host-side 'shadows' for all global
+  // device-side variables because CUDA runtime API needs their
+  // size and host-side address in order to provide access to

s/CUDA runtime API/the CUDA runtime/ (not really a requirement of the API, I 
think?)


Comment at: lib/CodeGen/CodeGenModule.cpp:1575
@@ +1574,3 @@
+  // definition, because we still need to define host-side shadow
+  // for it.
+} else if (VD->isThisDeclarationADefinition() != VarDecl::Definition &&

Kind of an odd way of writing this control flow?  Could we phrase it more 
idiomatically as

  MustEmitForCUDA = !VD->hasDefinition() && ...;
  if (!MustEmitForCUDA && ...) return;


Comment at: lib/CodeGen/CodeGenModule.cpp:2477
@@ +2476,3 @@
+  if (D->hasAttr() || D->hasAttr()) {
+Linkage = llvm::GlobalValue::InternalLinkage;
+

Is it worth explaining why the shadows get internal linkage?


Comment at: lib/CodeGen/CodeGenModule.cpp:2480
@@ +2479,3 @@
+// Shadow variables and their properties must be registered
+// with CUDA runtime.
+unsigned Flags = 0;

with the CUDA runtime


Comment at: lib/CodeGen/CodeGenModule.cpp:2483
@@ +2482,3 @@
+if (!D->hasDefinition())
+  Flags |= CGCUDARuntime::DevVarExt;
+if (D->hasAttr())

Now that I see them in context, I think these flags would be a lot easier to 
handle if they employed less abbreviation.  "ExternalDeviceVar", 
"ConstDeviceVar"?


Comment at: test/CodeGenCUDA/device-stub.cu:14
@@ +13,3 @@
+
+// Make sure host globals don't get internalized..
+// CHECK-DAG: @host_var = global i32

Not sure if this is a typo or if you mean "...".


Comment at: test/CodeGenCUDA/device-stub.cu:17
@@ +16,3 @@
+int host_var;
+// .. and that extern vars remain external.
+// CHECK-DAG: @ext_host_var = external global i32

Here you do seem to mean "..."


http://reviews.llvm.org/D17779



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D17779: [CUDA] Emit host-side 'shadows' for device-side global variables

2016-03-01 Thread Artem Belevich via cfe-commits
tra created this revision.
tra added reviewers: jlebar, jingyue.
tra added a subscriber: cfe-commits.

.. and register them with CUDA runtime.

This is needed for commonly used cudaMemcpy*() APIs that use address of 
host-side shadow to access their counterparts on device side.

Fixes PR26340.

http://reviews.llvm.org/D17779

Files:
  lib/CodeGen/CGCUDANV.cpp
  lib/CodeGen/CGCUDARuntime.h
  lib/CodeGen/CodeGenModule.cpp
  test/CodeGenCUDA/device-stub.cu
  test/CodeGenCUDA/filter-decl.cu

Index: test/CodeGenCUDA/filter-decl.cu
===
--- test/CodeGenCUDA/filter-decl.cu
+++ test/CodeGenCUDA/filter-decl.cu
@@ -9,11 +9,11 @@
 // CHECK-DEVICE-NOT: module asm "file scope asm is host only"
 __asm__("file scope asm is host only");
 
-// CHECK-HOST-NOT: constantdata = externally_initialized global
+// CHECK-HOST: constantdata = internal global
 // CHECK-DEVICE: constantdata = externally_initialized global
 __constant__ char constantdata[256];
 
-// CHECK-HOST-NOT: devicedata = externally_initialized global
+// CHECK-HOST: devicedata = internal global
 // CHECK-DEVICE: devicedata = externally_initialized global
 __device__ char devicedata[256];
 
Index: test/CodeGenCUDA/device-stub.cu
===
--- test/CodeGenCUDA/device-stub.cu
+++ test/CodeGenCUDA/device-stub.cu
@@ -2,6 +2,40 @@
 
 #include "Inputs/cuda.h"
 
+// CHECK-DAG: @device_var = internal global i32
+__device__ int device_var;
+
+// CHECK-DAG: @constant_var = internal global i32
+__constant__ int constant_var;
+
+// CHECK-DAG: @shared_var = internal global i32
+__shared__ int shared_var;
+
+// Make sure host globals don't get internalized..
+// CHECK-DAG: @host_var = global i32
+int host_var;
+// .. and that extern vars remain external.
+// CHECK-DAG: @ext_host_var = external global i32
+extern int ext_host_var;
+
+// Shadows for external device-side variables are *definitions* of
+// those variables.
+// CHECK-DAG: @ext_device_var = internal global i32
+extern __device__ int ext_device_var;
+// CHECK-DAG: @ext_device_var = internal global i32
+extern __constant__ int ext_constant_var;
+
+void use_pointers() {
+  int *p;
+  p = _var;
+  p = _var;
+  p = _var;
+  p = _var;
+  p = _device_var;
+  p = _constant_var;
+  p = _host_var;
+}
+
 // Make sure that all parts of GPU code init/cleanup are there:
 // * constant unnamed string with the kernel name
 // CHECK: private unnamed_addr constant{{.*}}kernelfunc{{.*}}\00"
@@ -32,18 +66,23 @@
 // CHECK: call{{.*}}kernelfunc
 void hostfunc(void) { kernelfunc<<<1, 1>>>(1, 1, 1); }
 
-// Test that we've built a function to register kernels
-// CHECK: define internal void @__cuda_register_kernels
+// Test that we've built a function to register kernels and global vars.
+// CHECK: define internal void @__cuda_register_globals
 // CHECK: call{{.*}}cudaRegisterFunction(i8** %0, {{.*}}kernelfunc
+// CHECK-DAG: call{{.*}}cudaRegisterVar(i8** %0, {{.*}}device_var{{.*}}i32 0, i32 4, i32 0, i32 0
+// CHECK-DAG: call{{.*}}cudaRegisterVar(i8** %0, {{.*}}constant_var{{.*}}i32 0, i32 4, i32 1, i32 0
+// CHECK-DAG: call{{.*}}cudaRegisterVar(i8** %0, {{.*}}ext_device_var{{.*}}i32 1, i32 4, i32 0, i32 0
+// CHECK-DAG: call{{.*}}cudaRegisterVar(i8** %0, {{.*}}ext_constant_var{{.*}}i32 1, i32 4, i32 1, i32 0
+// CHECK: ret void
 
 // Test that we've built contructor..
 // CHECK: define internal void @__cuda_module_ctor
 //   .. that calls __cudaRegisterFatBinary(&__cuda_fatbin_wrapper)
 // CHECK: call{{.*}}cudaRegisterFatBinary{{.*}}__cuda_fatbin_wrapper
 //   .. stores return value in __cuda_gpubin_handle
 // CHECK-NEXT: store{{.*}}__cuda_gpubin_handle
-//   .. and then calls __cuda_register_kernels
-// CHECK-NEXT: call void @__cuda_register_kernels
+//   .. and then calls __cuda_register_globals
+// CHECK-NEXT: call void @__cuda_register_globals
 
 // Test that we've created destructor.
 // CHECK: define internal void @__cuda_module_dtor
Index: lib/CodeGen/CodeGenModule.cpp
===
--- lib/CodeGen/CodeGenModule.cpp
+++ lib/CodeGen/CodeGenModule.cpp
@@ -1528,11 +1528,18 @@
   !Global->hasAttr())
 return;
 } else {
-  if (!Global->hasAttr() && (
-Global->hasAttr() ||
-Global->hasAttr() ||
-Global->hasAttr()))
+  // We need to emit host-side 'shadows' for all global
+  // device-side variables because CUDA runtime API needs their
+  // size and host-side address in order to provide access to
+  // their device-side incarnations.
+
+  // So device-only functions are the only things we skip.
+  if (isa(Global) && !Global->hasAttr() &&
+  Global->hasAttr())
 return;
+
+  assert((isa(Global) || isa(Global)) &&
+ "Expected Variable or Function");
 }
   }
 
@@ -1561,9 +1568,13 @@
   } else {
 const auto *VD = cast(Global);