[Bf-blender-cvs] [23098cda993] master: Code refactor: make texture code more consistent between devices.

Brecht Van Lommel Sat, 07 Oct 2017 06:57:53 -0700

Commit: 23098cda9936d785988b689ee69e58e900f17cb2
Author: Brecht Van Lommel
Date:   Fri Oct 6 21:47:41 2017 +0200
Branches: master
https://developer.blender.org/rB23098cda9936d785988b689ee69e58e900f17cb2


Code refactor: make texture code more consistent between devices.

* Use common TextureInfo struct for all devices, except CUDA fermi.
* Move image sampling code to kernels/*/kernel_*_image.h files.
* Use arrays for data textures on Fermi too, so device_vector<Struct> works.

===================================================================

M       intern/cycles/device/device.h
M       intern/cycles/device/device_cpu.cpp
M       intern/cycles/device/device_cuda.cpp
M       intern/cycles/device/opencl/opencl.h
M       intern/cycles/device/opencl/opencl_base.cpp
M       intern/cycles/device/opencl/opencl_split.cpp
M       intern/cycles/kernel/CMakeLists.txt
M       intern/cycles/kernel/geom/geom_volume.h
M       intern/cycles/kernel/kernel_compat_cpu.h
M       intern/cycles/kernel/kernel_compat_cuda.h
M       intern/cycles/kernel/kernel_compat_opencl.h
M       intern/cycles/kernel/kernel_globals.h
M       intern/cycles/kernel/kernel_textures.h
M       intern/cycles/kernel/kernels/cpu/kernel.cpp
M       intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h
M       intern/cycles/kernel/kernels/cuda/kernel.cu
A       intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h
M       intern/cycles/kernel/kernels/opencl/kernel.cl
R086    intern/cycles/kernel/kernel_image_opencl.h      
intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h
M       intern/cycles/kernel/osl/osl_services.cpp
M       intern/cycles/kernel/split/kernel_split_common.h
M       intern/cycles/kernel/svm/svm_image.h
M       intern/cycles/kernel/svm/svm_voxel.h
M       intern/cycles/util/util_texture.h
M       intern/cycles/util/util_types.h

===================================================================

diff --git a/intern/cycles/device/device.h b/intern/cycles/device/device.h
index 26d6d380a10..0e0a0079209 100644
--- a/intern/cycles/device/device.h
+++ b/intern/cycles/device/device.h
@@ -26,6 +26,7 @@
 #include "util/util_stats.h"
 #include "util/util_string.h"
 #include "util/util_thread.h"
+#include "util/util_texture.h"
 #include "util/util_types.h"
 #include "util/util_vector.h"
 
diff --git a/intern/cycles/device/device_cpu.cpp 
b/intern/cycles/device/device_cpu.cpp
index 19e3c0a9075..ac6d3246d38 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -163,6 +163,9 @@ public:
        TaskPool task_pool;
        KernelGlobals kernel_globals;
 
+       device_vector<TextureInfo> texture_info;
+       bool need_texture_info;
+
 #ifdef WITH_OSL
        OSLGlobals osl_globals;
 #endif
@@ -235,6 +238,8 @@ public:
                        VLOG(1) << "Will be using split kernel.";
                }
 
+               need_texture_info = false;
+
 #define REGISTER_SPLIT_KERNEL(name) split_kernels[#name] = 
KernelFunctions<void(*)(KernelGlobals*, KernelData*)>(KERNEL_FUNCTIONS(name))
                REGISTER_SPLIT_KERNEL(path_init);
                REGISTER_SPLIT_KERNEL(scene_intersect);
@@ -261,6 +266,7 @@ public:
        ~CPUDevice()
        {
                task_pool.stop();
+               tex_free(texture_info);
        }
 
        virtual bool show_samples() const
@@ -268,6 +274,15 @@ public:
                return (TaskScheduler::num_threads() == 1);
        }
 
+       void load_texture_info()
+       {
+               if(need_texture_info) {
+                       tex_free(texture_info);
+                       tex_alloc("__texture_info", texture_info, 
INTERPOLATION_NONE, EXTENSION_REPEAT);
+                       need_texture_info = false;
+               }
+       }
+
        void mem_alloc(const char *name, device_memory& mem, MemoryType 
/*type*/)
        {
                if(name) {
@@ -333,14 +348,47 @@ public:
                VLOG(1) << "Texture allocate: " << name << ", "
                        << string_human_readable_number(mem.memory_size()) << " 
bytes. ("
                        << string_human_readable_size(mem.memory_size()) << ")";
-               kernel_tex_copy(&kernel_globals,
-                               name,
-                               mem.data_pointer,
-                               mem.data_width,
-                               mem.data_height,
-                               mem.data_depth,
-                               interpolation,
-                               extension);
+
+               if(interpolation == INTERPOLATION_NONE) {
+                       /* Data texture. */
+                       kernel_tex_copy(&kernel_globals,
+                                                       name,
+                                                       mem.data_pointer,
+                                                       mem.data_width,
+                                                       mem.data_height,
+                                                       mem.data_depth,
+                                                       interpolation,
+                                                       extension);
+               }
+               else {
+                       /* Image Texture. */
+                       int flat_slot = 0;
+                       if(string_startswith(name, "__tex_image")) {
+                               int pos =  string(name).rfind("_");
+                               flat_slot = atoi(name + pos + 1);
+                       }
+                       else {
+                               assert(0);
+                       }
+
+                       if(flat_slot >= texture_info.size()) {
+                               /* Allocate some slots in advance, to reduce 
amount
+                                * of re-allocations. */
+                               texture_info.resize(flat_slot + 128);
+                       }
+
+                       TextureInfo& info = texture_info.get_data()[flat_slot];
+                       info.data = (uint64_t)mem.data_pointer;
+                       info.cl_buffer = 0;
+                       info.interpolation = interpolation;
+                       info.extension = extension;
+                       info.width = mem.data_width;
+                       info.height = mem.data_height;
+                       info.depth = mem.data_depth;
+
+                       need_texture_info = true;
+               }
+
                mem.device_pointer = mem.data_pointer;
                mem.device_size = mem.memory_size();
                stats.mem_alloc(mem.device_size);
@@ -352,6 +400,7 @@ public:
                        mem.device_pointer = 0;
                        stats.mem_free(mem.device_size);
                        mem.device_size = 0;
+                       need_texture_info = true;
                }
        }
 
@@ -784,6 +833,9 @@ public:
 
        void task_add(DeviceTask& task)
        {
+               /* Load texture info. */
+               load_texture_info();
+
                /* split task into smaller ones */
                list<DeviceTask> tasks;
 
diff --git a/intern/cycles/device/device_cuda.cpp 
b/intern/cycles/device/device_cuda.cpp
index 734edcff503..dcbe6033bcc 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -129,7 +129,7 @@ public:
        CUcontext cuContext;
        CUmodule cuModule, cuFilterModule;
        map<device_ptr, bool> tex_interp_map;
-       map<device_ptr, uint> tex_bindless_map;
+       map<device_ptr, CUtexObject> tex_bindless_map;
        int cuDevId;
        int cuDevArchitecture;
        bool first_error;
@@ -145,8 +145,8 @@ public:
        map<device_ptr, PixelMem> pixel_mem_map;
 
        /* Bindless Textures */
-       device_vector<uint> bindless_mapping;
-       bool need_bindless_mapping;
+       device_vector<TextureInfo> texture_info;
+       bool need_texture_info;
 
        CUdeviceptr cuda_device_ptr(device_ptr mem)
        {
@@ -231,7 +231,7 @@ public:
 
                split_kernel = NULL;
 
-               need_bindless_mapping = false;
+               need_texture_info = false;
 
                /* intialize */
                if(cuda_error(cuInit(0)))
@@ -274,7 +274,7 @@ public:
                delete split_kernel;
 
                if(info.has_bindless_textures) {
-                       tex_free(bindless_mapping);
+                       tex_free(texture_info);
                }
 
                cuda_assert(cuCtxDestroy(cuContext));
@@ -544,12 +544,12 @@ public:
                return (result == CUDA_SUCCESS);
        }
 
-       void load_bindless_mapping()
+       void load_texture_info()
        {
-               if(info.has_bindless_textures && need_bindless_mapping) {
-                       tex_free(bindless_mapping);
-                       tex_alloc("__bindless_mapping", bindless_mapping, 
INTERPOLATION_NONE, EXTENSION_REPEAT);
-                       need_bindless_mapping = false;
+               if(info.has_bindless_textures && need_texture_info) {
+                       tex_free(texture_info);
+                       tex_alloc("__texture_info", texture_info, 
INTERPOLATION_NONE, EXTENSION_REPEAT);
+                       need_texture_info = false;
                }
        }
 
@@ -646,8 +646,7 @@ public:
                        << string_human_readable_number(mem.memory_size()) << " 
bytes. ("
                        << string_human_readable_size(mem.memory_size()) << ")";
 
-               /* Check if we are on sm_30 or above.
-                * We use arrays and bindles textures for storage there */
+               /* Check if we are on sm_30 or above, for bindless textures. */
                bool has_bindless_textures = info.has_bindless_textures;
 
                /* General variables for both architectures */
@@ -679,20 +678,10 @@ public:
                        filter_mode = CU_TR_FILTER_MODE_LINEAR;
                }
 
-               CUarray_format_enum format;
-               switch(mem.data_type) {
-                       case TYPE_UCHAR: format = CU_AD_FORMAT_UNSIGNED_INT8; 
break;
-                       case TYPE_UINT: format = CU_AD_FORMAT_UNSIGNED_INT32; 
break;
-                       case TYPE_INT: format = CU_AD_FORMAT_SIGNED_INT32; 
break;
-                       case TYPE_FLOAT: format = CU_AD_FORMAT_FLOAT; break;
-                       case TYPE_HALF: format = CU_AD_FORMAT_HALF; break;
-                       default: assert(0); return;
-               }
-
                /* General variables for Fermi */
                CUtexref texref = NULL;
 
-               if(!has_bindless_textures) {
+               if(!has_bindless_textures && interpolation != 
INTERPOLATION_NONE) {
                        if(mem.data_depth > 1) {
                                /* Kernel uses different bind names for 2d and 
3d float textures,
                                 * so we have to adjust couple of things here.
@@ -711,41 +700,41 @@ public:
                        }
                }
 
-               /* Data Storage */
                if(interpolation == INTERPOLATION_NONE) {
-                       if(has_bindless_textures) {
-                               mem_alloc(NULL, mem, MEM_READ_ONLY);
-                               mem_copy_to(mem);
+                       /* Data Storage */
+                       mem_alloc(NULL, mem, MEM_READ_ONLY);
+                       mem_copy_to(mem);
 
-                               CUdeviceptr cumem;
-                               size_t cubytes;
+                       CUdeviceptr cumem;
+                       size_t cubytes;
 
-                               cuda_assert(cuModuleGetGlobal(&cumem, &cubytes, 
cuModule, bind_name.c_str()));
+                       cuda_assert(cuModuleGetGlobal(&cumem, &cubytes, 
cuModule, bind_name.c_str()));
 
-                               if(cubytes == 8) {
-                                       /* 64 bit device pointer */
-                                       uint64_t ptr = mem.device_pointer;
-                                       cuda_assert(cuMemcpyHtoD(cumem, 
(void*)&ptr, cubytes));
-                               }
-                               else {
-                                       /* 32 bit device pointer */
-                                       uint32_t ptr = 
(uint32_t)mem.device_pointer;
-                                       cuda_assert(cuMemcpyHtoD(cumem, 
(void*)&ptr, cubytes));
-                               }
+                       if(cubytes == 8) {
+                               /* 64 bit device pointer */
+                               uint64_t ptr = mem.device_pointer;
+                               cuda_assert(cuMemcpyHtoD(cumem, (void*)&ptr, 
cubytes));
                        }
                        else {
-                               mem_alloc(NULL, mem, MEM_READ_ONLY);
-                               mem_copy_to(mem);
-
-                               cuda_assert(cuTexRefSetAddress(NULL, texref, 
cuda_device_ptr(mem.device_pointer), size));
-                               cuda_assert(cuTexRefSetFilterMode(texref, 
CU_TR_FILTER_MODE_POINT));
-                               cuda_assert(cuTexRefSetFlags(texref, 
CU_TRSF_READ_AS_INTEGER));
+                               /* 32 bit device pointer */
+                               uint32_t ptr = (uint32_t)mem.device_pointer;
+                               cuda_assert(cuMemcpyHtoD(cumem, (void*)&ptr, 
cubytes));
                        }
                }
-               /* Texture Storage */
                else {
+                       /* Texture Storage */
                        CUarray handle = NULL;
 
+                       CUarray_format_enum format;
+                       switch(mem.data_type) {
+                               case TYPE_UCHAR: format = 
CU_AD_FORMAT_UNSIGNED_INT8; break;
+                               case TYPE_UINT: format = 
CU_AD_FORMAT_UNSIGNED_INT32; break;
+                               case TYPE_INT: format = 
CU_AD_FORMAT_SIGNED_INT32; break;
+                               case TYPE_FLOAT: format = CU_AD_FORMAT_FLOAT; 
break;
+                               case TYPE_HALF: format = CU_AD_FORMAT_HALF; 
break;
+                               default: assert(0); return;
+                       }
+
                        if(mem.data_depth > 1) {
                                CUDA_ARRAY3D_DESCRIPTOR desc;
 
@@ -810,8 +799,8 @@ public:
 
                        stats.mem_alloc(size);
 
-                       /* Bindless Textures - Kepler */
                        if(has_bindless_textures) {
+                               /* Bindless Textures - Kepler */
                                int flat_slot = 0;
                                if(string_startswith(name, "__tex_image")) {
                                        int pos =  string(name).rfind("_");
@@ -844,35 +833,39 @@ public:
                                }
 
                                /* Resize once */
-                               if(flat_slot >= bindless_mapping.size()) {
+                               if(flat_slot >= texture_info.size()) {
                                        /* Allocate some slots in advance, to 
reduce amount
-                                        * of re-allocations.
-                                        */
-                                       bindless_mapping.resize(flat_slot + 
128);
+                                        * of re-allocations. */
+                                       texture_info.resize(flat_slot + 128);
                                }
 
                                /* Set Mapping and tag that we need to 
(re-)upload to device */
-                               bindless_mapping.get_data()[flat_slot] = 
(uint)tex;
-                               tex_bindless_map[mem.device_pointer] = 
(uint)tex;
-                               need_bindless_mapping = true;
+                               TextureInfo& info = 
texture_info.get_data()[flat_slot];
+                               info.data = (uint64_t)tex;
+                               info.cl_buffer = 0;
+                               info.interpolation = interpolation;
+                               info.extension = extension;
+                               info.width = mem.data_width;
+                               info.height = mem.data_height;
+                               info.depth = mem.data_depth;
+
+                               tex_bindless_map[mem.device_pointer] = tex;
+                               need_texture_info = true;
                        }
-                       /* Regular Textures - Fermi */
                        else {
+                               /* Regular Textures - Fermi */
                                cuda_assert(cuTexRefSetArray(texref, handle, 
CU_TRSA_OVERRIDE_FORMAT));
                                cuda_assert(cuTexRefSetFilterMode(texref, 
filter_mode));
                                cuda_assert(cuTexRefSetFlags(texref, CU_TRS

@@ Diff output truncated at 10240 characters. @@

_______________________________________________
Bf-blender-cvs mailing list
[email protected]
https://lists.blender.org/mailman/listinfo/bf-blender-cvs

[Bf-blender-cvs] [23098cda993] master: Code refactor: make texture code more consistent between devices.

Reply via email to