Commit: 5801ef71e40bc932c69e67f06076cd8b41132e52
Author: Brecht Van Lommel
Date:   Sun Nov 5 00:34:30 2017 +0100
Branches: master
https://developer.blender.org/rB5801ef71e40bc932c69e67f06076cd8b41132e52

Code refactor: device memory cleanups, preparing for mapped host memory.

===================================================================

M       intern/cycles/device/device.cpp
M       intern/cycles/device/device_cpu.cpp
M       intern/cycles/device/device_cuda.cpp
M       intern/cycles/device/device_memory.cpp
M       intern/cycles/device/device_memory.h
M       intern/cycles/device/device_network.cpp
M       intern/cycles/device/device_network.h
M       intern/cycles/device/device_split_kernel.cpp
M       intern/cycles/device/opencl/memory_manager.cpp
M       intern/cycles/device/opencl/opencl_base.cpp
M       intern/cycles/kernel/kernel.h
M       intern/cycles/kernel/kernels/cpu/kernel.cpp
M       intern/cycles/render/bake.cpp
M       intern/cycles/render/buffers.cpp
M       intern/cycles/render/light.cpp
M       intern/cycles/render/mesh_displace.cpp
M       intern/cycles/render/object.cpp
M       intern/cycles/render/tables.cpp

===================================================================

diff --git a/intern/cycles/device/device.cpp b/intern/cycles/device/device.cpp
index b2f20bab58b..641e3fde140 100644
--- a/intern/cycles/device/device.cpp
+++ b/intern/cycles/device/device.cpp
@@ -102,17 +102,17 @@ void Device::draw_pixels(device_memory& rgba, int y, int 
w, int h, int dx, int d
        if(rgba.data_type == TYPE_HALF) {
                /* for multi devices, this assumes the inefficient method that 
we allocate
                 * all pixels on the device even though we only render to a 
subset */
-               GLhalf *data_pointer = (GLhalf*)rgba.data_pointer;
+               GLhalf *host_pointer = (GLhalf*)rgba.host_pointer;
                float vbuffer[16], *basep;
                float *vp = NULL;
 
-               data_pointer += 4*y*w;
+               host_pointer += 4*y*w;
 
                /* draw half float texture, GLSL shader for display transform 
assumed to be bound */
                GLuint texid;
                glGenTextures(1, &texid);
                glBindTexture(GL_TEXTURE_2D, texid);
-               glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA16F_ARB, w, h, 0, 
GL_RGBA, GL_HALF_FLOAT, data_pointer);
+               glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA16F_ARB, w, h, 0, 
GL_RGBA, GL_HALF_FLOAT, host_pointer);
                glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, 
GL_NEAREST);
                glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, 
GL_NEAREST);
 
@@ -194,7 +194,7 @@ void Device::draw_pixels(device_memory& rgba, int y, int w, 
int h, int dx, int d
                glPixelZoom((float)width/(float)w, (float)height/(float)h);
                glRasterPos2f(dx, dy);
 
-               uint8_t *pixels = (uint8_t*)rgba.data_pointer;
+               uint8_t *pixels = (uint8_t*)rgba.host_pointer;
 
                pixels += 4*y*w;
 
diff --git a/intern/cycles/device/device_cpu.cpp 
b/intern/cycles/device/device_cpu.cpp
index 0c0e6af7eb4..1a54c3380ee 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -297,10 +297,14 @@ public:
                                                << 
string_human_readable_size(mem.memory_size()) << ")";
                        }
 
-                       mem.device_pointer = mem.data_pointer;
-
-                       if(!mem.device_pointer) {
-                               mem.device_pointer = 
(device_ptr)malloc(mem.memory_size());
+                       if(mem.type == MEM_DEVICE_ONLY) {
+                               assert(!mem.host_pointer);
+                               size_t alignment = mem_address_alignment();
+                               void *data = 
util_aligned_malloc(mem.memory_size(), alignment);
+                               mem.device_pointer = (device_ptr)data;
+                       }
+                       else {
+                               mem.device_pointer = 
(device_ptr)mem.host_pointer;
                        }
 
                        mem.device_size = mem.memory_size();
@@ -350,8 +354,8 @@ public:
                        tex_free(mem);
                }
                else if(mem.device_pointer) {
-                       if(!mem.data_pointer) {
-                               free((void*)mem.device_pointer);
+                       if(mem.type == MEM_DEVICE_ONLY) {
+                               util_aligned_free((void*)mem.device_pointer);
                        }
                        mem.device_pointer = 0;
                        stats.mem_free(mem.device_size);
@@ -379,7 +383,7 @@ public:
                        /* Data texture. */
                        kernel_tex_copy(&kernel_globals,
                                                        mem.name,
-                                                       mem.data_pointer,
+                                                       mem.host_pointer,
                                                        mem.data_size);
                }
                else {
@@ -400,7 +404,7 @@ public:
                        }
 
                        TextureInfo& info = texture_info[flat_slot];
-                       info.data = (uint64_t)mem.data_pointer;
+                       info.data = (uint64_t)mem.host_pointer;
                        info.cl_buffer = 0;
                        info.interpolation = mem.interpolation;
                        info.extension = mem.extension;
@@ -411,7 +415,7 @@ public:
                        need_texture_info = true;
                }
 
-               mem.device_pointer = mem.data_pointer;
+               mem.device_pointer = (device_ptr)mem.host_pointer;
                mem.device_size = mem.memory_size();
                stats.mem_alloc(mem.device_size);
        }
@@ -457,7 +461,7 @@ public:
 
        bool denoising_set_tiles(device_ptr *buffers, DenoisingTask *task)
        {
-               TilesInfo *tiles = (TilesInfo*) task->tiles_mem.data_pointer;
+               TilesInfo *tiles = (TilesInfo*) task->tiles_mem.host_pointer;
                for(int i = 0; i < 9; i++) {
                        tiles->buffers[i] = buffers[i];
                }
diff --git a/intern/cycles/device/device_cuda.cpp 
b/intern/cycles/device/device_cuda.cpp
index 59d4fb055d0..4ab3cb9da75 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -128,20 +128,26 @@ public:
        CUdevice cuDevice;
        CUcontext cuContext;
        CUmodule cuModule, cuFilterModule;
-       map<device_ptr, bool> tex_interp_map;
-       map<device_ptr, CUtexObject> tex_bindless_map;
        int cuDevId;
        int cuDevArchitecture;
        bool first_error;
        CUDASplitKernel *split_kernel;
 
+       struct CUDAMem {
+               CUDAMem()
+               : texobject(0), array(0) {}
+
+               CUtexObject texobject;
+               CUarray array;
+       };
+       map<device_memory*, CUDAMem> cuda_mem_map;
+
        struct PixelMem {
                GLuint cuPBO;
                CUgraphicsResource cuPBOresource;
                GLuint cuTexId;
                int w, h;
        };
-
        map<device_ptr, PixelMem> pixel_mem_map;
 
        /* Bindless Textures */
@@ -615,7 +621,7 @@ public:
                }
        }
 
-       void generic_alloc(device_memory& mem, size_t padding = 0)
+       CUDAMem *generic_alloc(device_memory& mem, size_t padding = 0)
        {
                CUDAContextScope scope(this);
 
@@ -625,19 +631,28 @@ public:
                                        << 
string_human_readable_size(mem.memory_size()) << ")";
                }
 
-               CUdeviceptr device_pointer;
+               /* Allocate memory on device. */
+               CUdeviceptr device_pointer = 0;
                size_t size = mem.memory_size();
                cuda_assert(cuMemAlloc(&device_pointer, size + padding));
                mem.device_pointer = (device_ptr)device_pointer;
                mem.device_size = size;
                stats.mem_alloc(size);
+
+               if(!mem.device_pointer) {
+                       return NULL;
+               }
+
+               /* Insert into map of allocations. */
+               CUDAMem *cmem = &cuda_mem_map[&mem];
+               return cmem;
        }
 
        void generic_copy_to(device_memory& mem)
        {
                if(mem.device_pointer) {
                        CUDAContextScope scope(this);
-                       
cuda_assert(cuMemcpyHtoD(cuda_device_ptr(mem.device_pointer), 
(void*)mem.data_pointer, mem.memory_size()));
+                       
cuda_assert(cuMemcpyHtoD(cuda_device_ptr(mem.device_pointer), mem.host_pointer, 
mem.memory_size()));
                }
        }
 
@@ -648,10 +663,11 @@ public:
 
                        
cuda_assert(cuMemFree(cuda_device_ptr(mem.device_pointer)));
 
-                       mem.device_pointer = 0;
-
                        stats.mem_free(mem.device_size);
+                       mem.device_pointer = 0;
                        mem.device_size = 0;
+
+                       cuda_mem_map.erase(cuda_mem_map.find(&mem));
                }
        }
 
@@ -700,11 +716,11 @@ public:
                        size_t size = elem*w*h;
 
                        if(mem.device_pointer) {
-                               
cuda_assert(cuMemcpyDtoH((uchar*)mem.data_pointer + offset,
+                               
cuda_assert(cuMemcpyDtoH((uchar*)mem.host_pointer + offset,
                                                                                
 (CUdeviceptr)(mem.device_pointer + offset), size));
                        }
                        else {
-                               memset((char*)mem.data_pointer + offset, 0, 
size);
+                               memset((char*)mem.host_pointer + offset, 0, 
size);
                        }
                }
        }
@@ -715,8 +731,8 @@ public:
                        mem_alloc(mem);
                }
 
-               if(mem.data_pointer) {
-                       memset((void*)mem.data_pointer, 0, mem.memory_size());
+               if(mem.host_pointer) {
+                       memset(mem.host_pointer, 0, mem.memory_size());
                }
 
                if(mem.device_pointer) {
@@ -814,8 +830,6 @@ public:
                                uint32_t ptr = (uint32_t)mem.device_pointer;
                                cuda_assert(cuMemcpyHtoD(cumem, (void*)&ptr, 
cubytes));
                        }
-
-                       tex_interp_map[mem.device_pointer] = false;
                        return;
                }
 
@@ -851,7 +865,7 @@ public:
                        default: assert(0); return;
                }
 
-
+               CUDAMem *cmem = NULL;
                CUarray array_3d = NULL;
                size_t src_pitch = mem.data_width * dsize * mem.data_elements;
                size_t dst_pitch = src_pitch;
@@ -878,7 +892,7 @@ public:
                        param.dstMemoryType = CU_MEMORYTYPE_ARRAY;
                        param.dstArray = array_3d;
                        param.srcMemoryType = CU_MEMORYTYPE_HOST;
-                       param.srcHost = (void*)mem.data_pointer;
+                       param.srcHost = mem.host_pointer;
                        param.srcPitch = src_pitch;
                        param.WidthInBytes = param.srcPitch;
                        param.Height = mem.data_height;
@@ -889,6 +903,10 @@ public:
                        mem.device_pointer = (device_ptr)array_3d;
                        mem.device_size = size;
                        stats.mem_alloc(size);
+
+                       cmem = &cuda_mem_map[&mem];
+                       cmem->texobject = 0;
+                       cmem->array = array_3d;
                }
                else if(mem.data_height > 1) {
                        /* 2D texture, using pitch aligned linear memory. */
@@ -897,7 +915,10 @@ public:
                        dst_pitch = align_up(src_pitch, alignment);
                        size_t dst_size = dst_pitch * mem.data_height;
 
-                       generic_alloc(mem, dst_size - mem.memory_size());
+                       cmem = generic_alloc(mem, dst_size - mem.memory_size());
+                       if(!cmem) {
+                               return;
+                       }
 
                        CUDA_MEMCPY2D param;
                        memset(&param, 0, sizeof(param));
@@ -905,7 +926,7 @@ public:
                        param.dstDevice = mem.device_pointer;
                        param.dstPitch = dst_pitch;
                        param.srcMemoryType = CU_MEMORYTYPE_HOST;
-                       param.srcHost = (void*)mem.data_pointer;
+                       param.srcHost = mem.host_pointer;
                        param.srcPitch = src_pitch;
                        param.WidthInBytes = param.srcPitch;
                        param.Height = mem.data_height;
@@ -914,8 +935,12 @@ public:
                }
                else {
                        /* 1D texture, using linear memory. */
-                       generic_alloc(mem);
-                       cuda_assert(cuMemcpyHtoD(mem.device_pointer, 
(void*)mem.data_pointer, size));
+                       cmem = generic_alloc(mem);
+                       if(!cmem) {
+                               return;
+                       }
+
+                       cuda_assert(cuMemcpyHtoD(mem.device_pointer, 
mem.host_pointer, size));
                }
 
                if(!has_fermi_limits) {
@@ -932,7 +957,7 @@ public:
                        CUDA_RESOURCE_DESC resDesc;
                        memset(&resDesc, 0, sizeof(resDesc));
 
-                       if(mem.data_depth > 1) {
+                       if(array_3d) {
                                resDesc.resType = CU_RESOURCE_TYPE_ARRAY;
                                resDesc.res.array.hArray = array_3d;
                                resDesc.flags = 0;
@@ -962,13 +987,7 @@ public:
                        texDesc.filterMode = filter_mode;
                        texDesc.flags = CU_TRSF_NORMALIZED_COORDINATES;
 
-                       CUtexObject tex = 0;
-                       cuda_assert(cuTexObjectCreate(&tex, &resDesc, &texDesc, 
NULL));
-
-                       /* Safety check */
-                       if((uint)tex > UINT_MAX) {
-                               assert(0);
-                       }
+                       cuda_assert(cuTexObjectCreate(&cmem->texobject, 
&resDesc, &texDesc, NULL));
 
                        /* Resize once */
                        if(flat_slot >= texture_info.size()) {
@@ -979,20 +998,18 @@ public:
 
                        /* Set Mapping and tag that we need to (re-)upload to 
device */
                        TextureInfo& info = texture_info[flat_slot];
-                       info.data = (uint64_t)tex;
+                       info.data = (uint64_t)cmem->texobject;
                        info.cl_buffer = 0;
                        info.interpolation = mem.interpolation;
                        info.extension = mem.extension;
                        info.width = mem.data_width;
                        info.height = mem.data_height;
                        info.depth = mem.data_depth;
-
-                       tex_bindless_map[mem.device_pointer] = tex;
                        need_texture_info = true;
                }
                else {
                        /* Fermi, fixed texture slots. */
-                       if(mem.data_depth > 1) {
+                       if(array_3d) {
                                cuda_assert(cuTexRefSetArray(texref, array_3d, 
CU_TRSA_OVERRIDE_FORMAT));
                        }
                        else if(mem.data_height > 1) {
@@ -1017,38 +1034,27 @@ public:
                                cuda_assert(cuTexRefSetAddressMode(texre

@@ Diff output truncated at 10240 characters. @@

_______________________________________________
Bf-blender-cvs mailing list
[email protected]
https://lists.blender.org/mailman/listinfo/bf-blender-cvs

Reply via email to