Commit: bc8e3a3d868d19719f67ec2e5ed2d0b516a98312 Author: Mai Lavelle Date: Tue Oct 18 11:30:25 2016 +0200 Branches: cycles_split_kernel https://developer.blender.org/rBbc8e3a3d868d19719f67ec2e5ed2d0b516a98312
Cycles: Replace use of cl_mem with device_memory in split kernel device Working towards using only device agnostic types and methods in the host. =================================================================== M intern/cycles/device/device.h M intern/cycles/device/device_memory.h M intern/cycles/device/opencl/opencl.h M intern/cycles/device/opencl/opencl_split.cpp =================================================================== diff --git a/intern/cycles/device/device.h b/intern/cycles/device/device.h index 77dc1fa..014e5fc 100644 --- a/intern/cycles/device/device.h +++ b/intern/cycles/device/device.h @@ -226,6 +226,25 @@ public: virtual void mem_zero(device_memory& mem) = 0; virtual void mem_free(device_memory& mem) = 0; + /* setup and allocate a device_memory object for use on device only (no host side buffer)*/ + void mem_alloc(device_memory& mem, size_t size, MemoryType type = MEM_READ_WRITE) + { + mem.data_type = device_type_traits<uchar>::data_type; + mem.data_elements = 1; + mem.data_pointer = 0; + mem.data_size = size; + mem.device_size = 0; + mem.data_width = size; + mem.data_height = 1; + mem.data_depth = 1; + + assert(mem.data_elements > 0); + + mem.device_pointer = 0; + + mem_alloc(mem, type); + } + /* constant memory */ virtual void const_copy_to(const char *name, void *host, size_t size) = 0; diff --git a/intern/cycles/device/device_memory.h b/intern/cycles/device/device_memory.h index 5b5b4dc..0093c93 100644 --- a/intern/cycles/device/device_memory.h +++ b/intern/cycles/device/device_memory.h @@ -180,10 +180,20 @@ public: /* device pointer */ device_ptr device_pointer; -protected: - device_memory() {} + device_memory() { + data_type = device_type_traits<float>::data_type; + data_elements = device_type_traits<float>::num_elements; + data_pointer = 0; + data_size = 0; + device_size = 0; + data_width = 0; + data_height = 0; + data_depth = 0; + device_pointer = 0; + } virtual ~device_memory() { assert(!device_pointer); } +protected: /* no copying */ device_memory(const device_memory&); device_memory& operator = (const device_memory&); diff --git a/intern/cycles/device/opencl/opencl.h b/intern/cycles/device/opencl/opencl.h index 30a35ac..83603b2 100644 --- a/intern/cycles/device/opencl/opencl.h +++ b/intern/cycles/device/opencl/opencl.h @@ -265,6 +265,7 @@ public: vector<OpenCLProgram*> &programs) = 0; void mem_alloc(device_memory& mem, MemoryType type); + using Device::mem_alloc; void mem_copy_to(device_memory& mem); void mem_copy_from(device_memory& mem, int y, int w, int h, int elem); void mem_zero(device_memory& mem); @@ -321,16 +322,39 @@ protected: class ArgumentWrapper { public: - ArgumentWrapper() : size(0), pointer(NULL) {} - template <typename T> + ArgumentWrapper() : size(0), pointer(NULL) + { + } + + ArgumentWrapper(device_memory& argument) : size(sizeof(void*)), + pointer((void*)(&argument.device_pointer)) + { + } + + template<typename T> + ArgumentWrapper(device_vector<T>& argument) : size(sizeof(void*)), + pointer((void*)(&argument.device_pointer)) + { + } + + template<typename T> ArgumentWrapper(T& argument) : size(sizeof(argument)), - pointer(&argument) { } + pointer(&argument) + { + } + ArgumentWrapper(int argument) : size(sizeof(int)), int_value(argument), - pointer(&int_value) { } + pointer(&int_value) + { + } + ArgumentWrapper(float argument) : size(sizeof(float)), float_value(argument), - pointer(&float_value) { } + pointer(&float_value) + { + } + size_t size; int int_value; float float_value; diff --git a/intern/cycles/device/opencl/opencl_split.cpp b/intern/cycles/device/opencl/opencl_split.cpp index fc80173..e1e1f54 100644 --- a/intern/cycles/device/opencl/opencl_split.cpp +++ b/intern/cycles/device/opencl/opencl_split.cpp @@ -107,20 +107,13 @@ public: * kernel will be available to another kernel via this global * memory. */ - cl_mem kgbuffer; /* KernelGlobals buffer. */ - - cl_mem split_data; - - /* Global state array that tracks ray state. */ - cl_mem ray_state; - - /* Queue */ - cl_mem Queue_index; /* Array of size num_queues * sizeof(int); - * Tracks the size of each queue. - */ + device_memory kgbuffer; + device_memory split_data; + device_vector<uchar> ray_state; + device_memory queue_index; /* Array of size num_queues * sizeof(int) that tracks the size of each queue. */ /* Flag to make sceneintersect and lampemission kernel use queues. */ - cl_mem use_queues_flag; + device_memory use_queues_flag; /* Amount of memory in output buffer associated with one pixel/thread. */ size_t per_thread_output_buffer_size; @@ -128,17 +121,12 @@ public: /* Total allocatable available device memory. */ size_t total_allocatable_memory; - /* host version of ray_state; Used in checking host path-iteration - * termination. - */ - char *hostRayStateArray; - /* Number of path-iterations to be done in one shot. */ unsigned int PathIteration_times; #ifdef __WORK_STEALING__ /* Work pool with respect to each work group. */ - cl_mem work_pool_wgs; + device_memory work_pool_wgs; /* Denotes the maximum work groups possible w.r.t. current tile size. */ unsigned int max_work_groups; @@ -155,20 +143,9 @@ public: { background = background_; - /* Initialize cl_mem variables. */ - kgbuffer = NULL; - split_data = NULL; - ray_state = NULL; - - /* Queue. */ - Queue_index = NULL; - use_queues_flag = NULL; - per_thread_output_buffer_size = 0; - hostRayStateArray = NULL; PathIteration_times = PATH_ITER_INC_FACTOR; #ifdef __WORK_STEALING__ - work_pool_wgs = NULL; max_work_groups = 0; #endif current_max_closure = -1; @@ -294,18 +271,14 @@ public: program_sum_all_radiance.release(); /* Release global memory */ - release_mem_object_safe(kgbuffer); - release_mem_object_safe(split_data); - release_mem_object_safe(ray_state); - release_mem_object_safe(use_queues_flag); - release_mem_object_safe(Queue_index); + mem_free(kgbuffer); + mem_free(split_data); + mem_free(ray_state); + mem_free(use_queues_flag); + mem_free(queue_index); #ifdef __WORK_STEALING__ - release_mem_object_safe(work_pool_wgs); + mem_free(work_pool_wgs); #endif - - if(hostRayStateArray != NULL) { - free(hostRayStateArray); - } } void path_trace(DeviceTask *task, @@ -313,9 +286,9 @@ public: int2 max_render_feasible_tile_size) { /* cast arguments to cl types */ - cl_mem d_data = CL_MEM_PTR(const_mem_map["__data"]->device_pointer); - cl_mem d_buffer = CL_MEM_PTR(rtile.buffer); - cl_mem d_rng_state = CL_MEM_PTR(rtile.rng_state); + device_memory& d_data = *const_mem_map["__data"]; + device_ptr d_buffer = rtile.buffer; + device_ptr d_rng_state = rtile.rng_state; cl_int d_x = rtile.x; cl_int d_y = rtile.y; cl_int d_w = rtile.w; @@ -383,26 +356,25 @@ public: max_global_size[1] = (((tile_y - 1) / local_size[1]) + 1) * local_size[1]; max_work_groups = (max_global_size[0] * max_global_size[1]) / (local_size[0] * local_size[1]); + /* Allocate work_pool_wgs memory. */ - work_pool_wgs = mem_alloc(max_work_groups * sizeof(unsigned int)); + mem_alloc(work_pool_wgs, max_work_groups * sizeof(unsigned int)); #endif /* __WORK_STEALING__ */ - /* Allocate queue_index memory only once. */ - Queue_index = mem_alloc(NUM_QUEUES * sizeof(int)); - use_queues_flag = mem_alloc(sizeof(char)); - kgbuffer = mem_alloc(get_KernelGlobals_size()); - ray_state = mem_alloc(num_global_elements * sizeof(char)); - split_data = mem_alloc(split_data_buffer_size(num_global_elements, - current_max_closure, - per_thread_output_buffer_size)); - - hostRayStateArray = (char *)calloc(num_global_elements, sizeof(char)); - assert(hostRayStateArray != NULL && "Can't create hostRayStateArray memory"); + mem_alloc(queue_index, NUM_QUEUES * sizeof(int)); + mem_alloc(use_queues_flag, sizeof(char)); + mem_alloc(kgbuffer, get_KernelGlobals_size()); + + ray_state.resize(num_global_elements); + mem_alloc(ray_state, MEM_READ_WRITE); + + mem_alloc(split_data, split_data_buffer_size(num_global_elements, + current_max_closure, + per_thread_output_buffer_size)); } cl_int dQueue_size = global_size[0] * global_size[1]; - //printf("kernel_set_args data_init\n"); cl_uint start_arg_index = kernel_set_args(program_data_init(), 0, @@ -433,7 +405,7 @@ public: rtile.rng_state_offset_x, rtile.rng_state_offset_y, rtile.buffer_rng_state_stride, - Queue_index, + queue_index, dQueue_size, use_queues_flag, #ifdef __WORK_STEALING__ @@ -463,7 +435,6 @@ public: #define GLUE(a, b) a ## b #define ENQUEUE_SPLIT_KERNEL(kernelName, globalSize, localSize) \ { \ - /*printf("enqueueing " #kernelName "\n");*/ \ ciErr = clEnqueueNDRangeKernel(cqCommandQueue, \ GLUE(program_, \ kernelName)(), \ @@ -517,20 +488,8 @@ public: } } - /* Read ray-state into Host memory to decide if we should exit - * path-iteration in host. - */ - //printf("enqueue read\n"); - ciErr = clEnqueueReadBuffer(cqCommandQueue, - ray_state, - CL_TRUE, - 0, - global_size[0] * global_size[1] * sizeof(char), - hostRayStateArray, - 0, - NULL, - NULL); - assert(ciErr == CL_SUCCESS); + /* Decide if we should exit path-iteration in host. */ + mem_copy_from(ray_state, 0, global_size[0] * global_size[1] * sizeof(char), 1, 1); activeRaysAvailable = false; @@ -538,7 +497,7 @@ public: rayStateIter < global_size[0] * global_size[1]; ++rayStateIter) { - if(int8_t(hostRayStateArra @@ Diff output truncated at 10240 characters. @@ _______________________________________________ Bf-blender-cvs mailing list Bf-blender-cvs@blender.org https://lists.blender.org/mailman/listinfo/bf-blender-cvs