[Bf-blender-cvs] [420d88d] soc-2016-cycles_images: Bindless Textures: Move bindless_mapping to CUDADevice.

Thomas Dinges Wed, 18 May 2016 16:49:44 -0700

Commit: 420d88d4b6881614935c1107a407a2f7f00db432
Author: Thomas Dinges
Date:   Wed May 18 22:11:45 2016 +0200
Branches: soc-2016-cycles_images
https://developer.blender.org/rB420d88d4b6881614935c1107a407a2f7f00db432


Bindless Textures: Move bindless_mapping to CUDADevice.

===================================================================

M       intern/cycles/device/device.h
M       intern/cycles/device/device_cpu.cpp
M       intern/cycles/device/device_cuda.cpp
M       intern/cycles/device/device_multi.cpp
M       intern/cycles/device/device_network.cpp
M       intern/cycles/device/device_opencl.cpp
M       intern/cycles/kernel/geom/geom_volume.h
M       intern/cycles/kernel/kernel_compat_cuda.h
M       intern/cycles/kernel/kernel_textures.h
M       intern/cycles/kernel/kernel_types.h
M       intern/cycles/kernel/svm/svm_image.h
M       intern/cycles/kernel/svm/svm_voxel.h
M       intern/cycles/render/image.cpp

===================================================================

diff --git a/intern/cycles/device/device.h b/intern/cycles/device/device.h
index 7d48692..2bfcf67 100644
--- a/intern/cycles/device/device.h
+++ b/intern/cycles/device/device.h
@@ -226,11 +226,11 @@ public:
                               device_memory& /*mem*/,
                               InterpolationType interpolation = 
INTERPOLATION_NONE,
                               ExtensionType extension = EXTENSION_REPEAT,
-                              uint *bindless_slot = 0)
+                              int flat_slot = 0)
        {
                (void)interpolation;  /* Ignored. */
                (void)extension;  /* Ignored. */
-               (void)bindless_slot; /* Ignored. */
+               (void)flat_slot; /* Ignored. */
        };
 
        virtual void tex_free(device_memory& /*mem*/) {};
diff --git a/intern/cycles/device/device_cpu.cpp 
b/intern/cycles/device/device_cpu.cpp
index 3265626..6b6be7c 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -154,7 +154,7 @@ public:
                       device_memory& mem,
                       InterpolationType interpolation,
                       ExtensionType extension,
-                      uint* /*bindless_slot*/)
+                      int /*flat_slot*/)
        {
                VLOG(1) << "Texture allocate: " << name << ", " << 
mem.memory_size() << " bytes.";
                kernel_tex_copy(&kernel_globals,
diff --git a/intern/cycles/device/device_cuda.cpp 
b/intern/cycles/device/device_cuda.cpp
index 9cbdde2..c28c41e 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -98,6 +98,11 @@ public:
 
        map<device_ptr, PixelMem> pixel_mem_map;
 
+       /* Bindless Textures */
+       CUtexObject bindless_mapping[4096];
+       device_vector<uint> bindless_mapping_device;
+       bool sync_bindless_mapping;
+
        CUdeviceptr cuda_device_ptr(device_ptr mem)
        {
                return (CUdeviceptr)mem;
@@ -180,6 +185,8 @@ public:
                cuDevice = 0;
                cuContext = 0;
 
+               sync_bindless_mapping = false;
+
                /* intialize */
                if(cuda_error(cuInit(0)))
                        return;
@@ -216,6 +223,8 @@ public:
        {
                task_pool.stop();
 
+               tex_free(bindless_mapping_device);
+
                cuda_assert(cuCtxDestroy(cuContext));
        }
 
@@ -469,7 +478,7 @@ public:
                       device_memory& mem,
                       InterpolationType interpolation,
                       ExtensionType extension,
-                      uint *bindless_slot)
+                      int flat_slot)
        {
                VLOG(1) << "Texture allocate: " << name << ", " << 
mem.memory_size() << " bytes.";
 
@@ -661,7 +670,9 @@ public:
 
                                CUtexObject tex = 0;
                                cuda_assert(cuTexObjectCreate(&tex, &resDesc, 
&texDesc, NULL));
-                               *bindless_slot = tex;
+                               bindless_mapping[flat_slot] = tex;
+
+                               sync_bindless_mapping = true;
                        }
                        /* Regular Textures - Fermi */
                        else {
@@ -720,6 +731,18 @@ public:
                if(have_error())
                        return;
 
+               /* Upload bindless_mapping vector */
+               if(cuDevArchitecture >= 300) {
+                       if(sync_bindless_mapping) {
+                               uint *tmp = 
bindless_mapping_device.resize(4096);
+                               for(size_t i = 0; i < 4096; i++) {
+                                       tmp[i] = (uint)bindless_mapping[i];
+                               }
+                               tex_alloc("__bindless_mapping", 
bindless_mapping_device, INTERPOLATION_NONE, EXTENSION_REPEAT, 0);
+                               sync_bindless_mapping = false;
+                       }
+               }
+
                cuda_push_context();
 
                CUfunction cuPathTrace;
diff --git a/intern/cycles/device/device_multi.cpp 
b/intern/cycles/device/device_multi.cpp
index f41c65d..34a97db 100644
--- a/intern/cycles/device/device_multi.cpp
+++ b/intern/cycles/device/device_multi.cpp
@@ -174,13 +174,13 @@ public:
                       InterpolationType
                       interpolation,
                       ExtensionType extension,
-                      uint *bindless_slot)
+                      int flat_slot)
        {
                VLOG(1) << "Texture allocate: " << name << ", " << 
mem.memory_size() << " bytes.";
 
                foreach(SubDevice& sub, devices) {
                        mem.device_pointer = 0;
-                       sub.device->tex_alloc(name, mem, interpolation, 
extension, bindless_slot);
+                       sub.device->tex_alloc(name, mem, interpolation, 
extension, flat_slot);
                        sub.ptr_map[unique_ptr] = mem.device_pointer;
                }
 
diff --git a/intern/cycles/device/device_network.cpp 
b/intern/cycles/device/device_network.cpp
index 6bd24cd..449f543 100644
--- a/intern/cycles/device/device_network.cpp
+++ b/intern/cycles/device/device_network.cpp
@@ -167,7 +167,7 @@ public:
                       device_memory& mem,
                       InterpolationType interpolation,
                       ExtensionType extension,
-                      uint *bindless_slot)
+                      int flat_slot)
        {
                VLOG(1) << "Texture allocate: " << name << ", " << 
mem.memory_size() << " bytes.";
 
@@ -183,7 +183,7 @@ public:
                snd.add(mem);
                snd.add(interpolation);
                snd.add(extension);
-               snd.add(bindless_slot);
+               snd.add(flat_slot);
                snd.write();
                snd.write_buffer((void*)mem.data_pointer, mem.memory_size());
        }
@@ -583,7 +583,7 @@ protected:
                        rcv.read(mem);
                        rcv.read(interpolation);
                        rcv.read(extension_type);
-                       rcv.read(bindless_slot);
+                       rcv.read(flat_slot);
                        lock.unlock();
 
                        client_pointer = mem.device_pointer;
@@ -599,7 +599,7 @@ protected:
 
                        rcv.read_buffer((uint8_t*)mem.data_pointer, data_size);
 
-                       device->tex_alloc(name.c_str(), mem, interpolation, 
extension_type, bindless_slot);
+                       device->tex_alloc(name.c_str(), mem, interpolation, 
extension_type, flat_slot);
 
                        pointer_mapping_insert(client_pointer, 
mem.device_pointer);
                }
diff --git a/intern/cycles/device/device_opencl.cpp 
b/intern/cycles/device/device_opencl.cpp
index ddd282b..61f83f2 100644
--- a/intern/cycles/device/device_opencl.cpp
+++ b/intern/cycles/device/device_opencl.cpp
@@ -1186,7 +1186,7 @@ public:
                       device_memory& mem,
                       InterpolationType /*interpolation*/,
                       ExtensionType /*extension*/,
-                      uint* /*bindless_slot*/)
+                      int /*flat_slot*/)
        {
                VLOG(1) << "Texture allocate: " << name << ", " << 
mem.memory_size() << " bytes.";
                mem_alloc(mem, MEM_READ_ONLY);
diff --git a/intern/cycles/kernel/geom/geom_volume.h 
b/intern/cycles/kernel/geom/geom_volume.h
index 95d2888..2044aaf 100644
--- a/intern/cycles/kernel/geom/geom_volume.h
+++ b/intern/cycles/kernel/geom/geom_volume.h
@@ -66,7 +66,7 @@ ccl_device float volume_attribute_float(KernelGlobals *kg, 
const ShaderData *sd,
        float3 P = volume_normalized_position(kg, sd, sd->P);
 #ifdef __KERNEL_GPU__
 #  if __CUDA_ARCH__ >= 300
-       CUtexObject tex = kernel_data.bindless_mapping[id];
+       CUtexObject tex = kernel_tex_fetch(__bindless_mapping, id);
        float f = kernel_tex_image_interp_3d_float(tex, P.x, P.y, P.z);
        float4 r = make_float4(f, f, f, 1.0);
 #  else
@@ -91,7 +91,7 @@ ccl_device float3 volume_attribute_float3(KernelGlobals *kg, 
const ShaderData *s
        float3 P = volume_normalized_position(kg, sd, sd->P);
 #ifdef __KERNEL_GPU__
 #  if __CUDA_ARCH__ >= 300
-       CUtexObject tex = kernel_data.bindless_mapping[id];
+       CUtexObject tex = kernel_tex_fetch(__bindless_mapping, id);
        float4 r = kernel_tex_image_interp_3d_float4(tex, P.x, P.y, P.z);
 #  else
        float4 r = volume_image_texture_3d(id, P.x, P.y, P.z);
diff --git a/intern/cycles/kernel/kernel_compat_cuda.h 
b/intern/cycles/kernel/kernel_compat_cuda.h
index 5d9c307..4231475 100644
--- a/intern/cycles/kernel/kernel_compat_cuda.h
+++ b/intern/cycles/kernel/kernel_compat_cuda.h
@@ -73,7 +73,6 @@ typedef texture<uchar4, 2, cudaReadModeNormalizedFloat> 
texture_image_uchar4;
  * Arrays are necessary in order to use the full VRAM on newer cards, and it's 
slightly faster.
  * Using Arrays on Fermi turned out to be slower.*/
 
-
 /* Fermi */
 #if __CUDA_ARCH__ < 300
 #  define __KERNEL_CUDA_TEX_STORAGE__
diff --git a/intern/cycles/kernel/kernel_textures.h 
b/intern/cycles/kernel/kernel_textures.h
index 87c77ef..285da14 100644
--- a/intern/cycles/kernel/kernel_textures.h
+++ b/intern/cycles/kernel/kernel_textures.h
@@ -175,6 +175,9 @@ KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, 
__tex_image_byte4_090)
 KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_091)
 KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_092)
 
+/* bindless textures */
+KERNEL_TEX(uint, texture_uint, __bindless_mapping)
+
 /* packed image (opencl) */
 KERNEL_TEX(uchar4, texture_uchar4, __tex_image_byte4_packed)
 KERNEL_TEX(float4, texture_float4, __tex_image_float4_packed)
diff --git a/intern/cycles/kernel/kernel_types.h 
b/intern/cycles/kernel/kernel_types.h
index 8a1bc1b..cc261ed 100644
--- a/intern/cycles/kernel/kernel_types.h
+++ b/intern/cycles/kernel/kernel_types.h
@@ -1158,7 +1158,6 @@ typedef struct KernelData {
        KernelBVH bvh;
        KernelCurves curve;
        KernelTables tables;
-       uint bindless_mapping[4096]; /*TODO(dingto): Dynamic alloc */
 } KernelData;
 
 #ifdef __KERNEL_DEBUG__
diff --git a/intern/cycles/kernel/svm/svm_image.h 
b/intern/cycles/kernel/svm/svm_image.h
index 0fd04a0..cf101f0 100644
--- a/intern/cycles/kernel/svm/svm_image.h
+++ b/intern/cycles/kernel/svm/svm_image.h
@@ -18,11 +18,15 @@ CCL_NAMESPACE_BEGIN
 
 /* Float4 textures on various devices. */
 #if defined(__KERNEL_CPU__)
-  #define TEX_NUM_FLOAT4_IMAGES        TEX_NUM_FLOAT4_IMAGES_CPU
+#  define TEX_NUM_FLOAT4_IMAGES        TEX_NUM_FLOAT4_IMAGES_CPU
 #elif defined(__KERNEL_CUDA__)
-  #define TEX_NUM_FLOAT4_IMAGES        TEX_NUM_FLOAT4_IMAGES_CUDA
+#  if __CUDA_ARCH__ < 300
+#    define TEX_NUM_FLOAT4_IMAGES      TEX_NUM_FLOAT4_IMAGES_CUDA
+#  else
+#    define TEX_NUM_FLOAT4_IMAGES      TEX_NUM_FLOAT4_IMAGES_CUDA_KEPLER
+#  endif
 #else
-  #define TEX_NUM_FLOAT4_IMAGES        TEX_NUM_FLOAT4_IMAGES_OPENCL
+#  define TEX_NUM_FLOAT4_IMAGES        TEX_NUM_FLOAT4_IMAGES_OPENCL
 #endif
 
 #ifdef __KERNEL_OPENCL__
@@ -260,7 +264,7 @@ ccl_device float4 svm_image_texture(KernelGlobals *kg, int 
id, float x, float y,
                        return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
        }
 #else
-       CUtexObject tex = kernel_data.bindless_mapping[id];
+       CUtexObject tex = kernel_tex_fetch(__bindless_mapping, id);
        if(id < 2048) /* TODO(dingto): Make this a variable */
                r = kernel_tex_image_interp_float4(tex, x, y);
        else {
diff --git a/intern/cycles/kernel/svm/svm_voxel.h 
b/intern/cycles/kernel/svm/svm_voxel.h
index 825d76d..d2cc2c3 100644
--- a/intern/cycles/kernel/svm/svm_voxel.h
+++ b/intern/cycles/kernel/svm/svm_voxel.h
@@ -45,7 +45,7 @@ ccl_device void svm_node_tex_voxel(KernelGlobals *kg,
        float4 r;
 #  if defined(__KERNEL_GPU__)
 

@@ Diff output truncated at 10240 characters. @@

_______________________________________________
Bf-blender-cvs mailing list
[email protected]
https://lists.blender.org/mailman/listinfo/bf-blender-cvs

[Bf-blender-cvs] [420d88d] soc-2016-cycles_images: Bindless Textures: Move bindless_mapping to CUDADevice.

Reply via email to