Commit: 63a604e0cf8a43f37c2e5444d71102d345350308 Author: Mai Lavelle Date: Tue Oct 18 18:09:42 2016 +0200 Branches: cycles_split_kernel https://developer.blender.org/rB63a604e0cf8a43f37c2e5444d71102d345350308
Cycles: Implement enqueue_split_kernel_data_init for OpenCL devices The `enqueue_split_kernel_data_init()` function will allow each device type to set up the various data buffers how ever they need to without concerning the rest of the split kernel logic. =================================================================== M intern/cycles/device/device.h M intern/cycles/device/opencl/opencl_split.cpp M intern/cycles/render/buffers.cpp M intern/cycles/render/buffers.h =================================================================== diff --git a/intern/cycles/device/device.h b/intern/cycles/device/device.h index f79678d..31dbb90 100644 --- a/intern/cycles/device/device.h +++ b/intern/cycles/device/device.h @@ -296,7 +296,20 @@ public: { return true; } /* split kernel */ - virtual bool enqueue_split_kernel_data_init() + virtual bool enqueue_split_kernel_data_init(const KernelDimensions& /*dim*/, + RenderTile& /*rtile*/, + int /*num_global_elements*/, + int /*num_parallel_samples*/, + device_memory& /*kernel_globals*/, + device_memory& /*kernel_data*/, + device_memory& /*split_data*/, + device_memory& /*ray_state*/, + device_memory& /*queue_index*/, + device_memory& /*use_queues_flag*/ +#ifdef __WORK_STEALING__ + , device_memory& /*work_pool_wgs*/ +#endif + ) { assert(!"not implemented for this device"); return false; diff --git a/intern/cycles/device/opencl/opencl_split.cpp b/intern/cycles/device/opencl/opencl_split.cpp index 7d8dd95..6efeb70 100644 --- a/intern/cycles/device/opencl/opencl_split.cpp +++ b/intern/cycles/device/opencl/opencl_split.cpp @@ -29,62 +29,6 @@ CCL_NAMESPACE_BEGIN -/* TODO(sergey): This is to keep tile split on OpenCL level working - * for now, since without this view-port render does not work as it - * should. - * - * Ideally it'll be done on the higher level, but we need to get ready - * for merge rather soon, so let's keep split logic private here in - * the file. - */ -class SplitRenderTile : public RenderTile { -public: - SplitRenderTile() - : RenderTile(), - buffer_offset_x(0), - buffer_offset_y(0), - rng_state_offset_x(0), - rng_state_offset_y(0), - buffer_rng_state_stride(0) {} - - explicit SplitRenderTile(RenderTile& tile) - : RenderTile(), - buffer_offset_x(0), - buffer_offset_y(0), - rng_state_offset_x(0), - rng_state_offset_y(0), - buffer_rng_state_stride(0) - { - x = tile.x; - y = tile.y; - w = tile.w; - h = tile.h; - start_sample = tile.start_sample; - num_samples = tile.num_samples; - sample = tile.sample; - resolution = tile.resolution; - offset = tile.offset; - stride = tile.stride; - buffer = tile.buffer; - rng_state = tile.rng_state; - buffers = tile.buffers; - } - - /* Split kernel is device global memory constrained; - * hence split kernel cant render big tile size's in - * one go. If the user sets a big tile size (big tile size - * is a term relative to the available device global memory), - * we split the tile further and then call path_trace on - * each of those split tiles. The following variables declared, - * assist in achieving that purpose - */ - int buffer_offset_x; - int buffer_offset_y; - int rng_state_offset_x; - int rng_state_offset_y; - int buffer_rng_state_stride; -}; - class OpenCLSplitKernelFunction : public SplitKernelFunction { public: OpenCLDeviceBase* device; @@ -346,20 +290,101 @@ public: #endif } + virtual bool enqueue_split_kernel_data_init(const KernelDimensions& dim, + RenderTile& rtile, + int num_global_elements, + int num_parallel_samples, + device_memory& kernel_globals, + device_memory& kernel_data, + device_memory& split_data, + device_memory& ray_state, + device_memory& queue_index, + device_memory& use_queues_flag, +#ifdef __WORK_STEALING__ + device_memory& work_pool_wgs +#endif + ) + { + cl_int dQueue_size = dim.global_size[0] * dim.global_size[1]; + + /* Set the range of samples to be processed for every ray in + * path-regeneration logic. + */ + cl_int start_sample = rtile.start_sample; + cl_int end_sample = rtile.start_sample + rtile.num_samples; + + cl_uint start_arg_index = + kernel_set_args(program_data_init(), + 0, + kernel_globals, + kernel_data, + split_data, + num_global_elements, + ray_state, + rtile.rng_state); + +/* TODO(sergey): Avoid map lookup here. */ +#define KERNEL_TEX(type, ttype, name) \ + set_kernel_arg_mem(program_data_init(), &start_arg_index, #name); +#include "kernel_textures.h" +#undef KERNEL_TEX + + start_arg_index += + kernel_set_args(program_data_init(), + start_arg_index, + start_sample, + end_sample, + rtile.x, + rtile.y, + rtile.w, + rtile.h, + rtile.offset, + rtile.stride, + rtile.rng_state_offset_x, + rtile.rng_state_offset_y, + rtile.buffer_rng_state_stride, + queue_index, + dQueue_size, + use_queues_flag, +#ifdef __WORK_STEALING__ + work_pool_wgs, + rtile.num_samples, +#endif + num_parallel_samples, + rtile.buffer_offset_x, + rtile.buffer_offset_y, + rtile.buffer_rng_state_stride, + rtile.buffer); + + /* Enqueue ckPathTraceKernel_data_init kernel. */ + ciErr = clEnqueueNDRangeKernel(cqCommandQueue, + program_data_init(), + 2, + NULL, + dim.global_size, + dim.local_size, + 0, + NULL, + NULL); + + opencl_assert_err(ciErr, "clEnqueueNDRangeKernel"); + + if(ciErr != CL_SUCCESS) { + string message = string_printf("OpenCL error: %s in clEnqueueNDRangeKernel()", + clewErrorString(ciErr)); + opencl_error(message); + return false; + } + + return true; + } + void path_trace(DeviceTask *task, - SplitRenderTile& rtile, + RenderTile& rtile, int2 max_render_feasible_tile_size) { /* cast arguments to cl types */ device_memory& d_data = *const_mem_map["__data"]; - device_ptr d_buffer = rtile.buffer; - device_ptr d_rng_state = rtile.rng_state; - cl_int d_x = rtile.x; - cl_int d_y = rtile.y; - cl_int d_w = rtile.w; - cl_int d_h = rtile.h; - cl_int d_offset = rtile.offset; - cl_int d_stride = rtile.stride; /* Make sure that set render feasible tile size is a multiple of local * work size dimensions. @@ -371,12 +396,8 @@ public: size_t local_size[2] = {SPLIT_KERNEL_LOCAL_SIZE_X, SPLIT_KERNEL_LOCAL_SIZE_Y}; - /* Set the range of samples to be processed for every ray in - * path-regeneration logic. - */ - cl_int start_sample = rtile.start_sample; - cl_int end_sample = rtile.start_sample + rtile.num_samples; - cl_int num_samples = rtile.num_samples; + int d_w = rtile.w; + int d_h = rtile.h; #ifdef __WORK_STEALING__ global_size[0] = (((d_w - 1) / local_size[0]) + 1) * local_size[0]; @@ -438,68 +459,21 @@ public: per_thread_output_buffer_size)); } - cl_int dQueue_size = global_size[0] * global_size[1]; - - cl_uint start_arg_index = - kernel_set_args(program_data_init(), - 0, - kgbuffer, - d_data, - split_data, - num_global_elements, - ray_state, - d_rng_state); - -/* TODO(sergey): Avoid map lookup here. */ -#define KERNEL_TEX(type, ttype, name) \ - set_kernel_arg_mem(program_data_init(), &start_arg_index, #name); -#include "kernel_textures.h" -#undef KERNEL_TEX - - start_arg_index += - kernel_set_args(program_data_init(), - start_arg_index, - start_sample, - end_sample, - d_x, - d_y, - d_w, - d_h, - d_offset, - d_stride, - rtile.rng_state_offset_x, - rtile.rng_state_offset_y, - rtile.buffer_rng_state_stride, - queue_index, - dQueue_size, - use_queues_flag, + if(!enqueue_split_kernel_data_init(KernelDimensions(global_size, local_size), + rtile, + num_global_elements, + num_parallel_samples, + kgbuffer, + d_data, + split_data, + ray_state, + queue_index, + use_queues_flag, #ifdef __WORK_STEALING__ - work_pool_wgs, - num_samples, + work_pool_wgs #endif - num_parallel_samples, - rtile.buffer_offset_x, - rtile.buffer_offset_y, - rtile.buffer_rng_state_stride, - d_buffer); - - /* Enqueue ckPathTraceKernel_data_init kernel. */ - ciErr = clEnqueueNDRangeKernel(cqCommandQueue, - program_data_init(), - 2, - NULL, - @@ Diff output truncated at 10240 characters. @@ _______________________________________________ Bf-blender-cvs mailing list Bf-blender-cvs@blender.org https://lists.blender.org/mailman/listinfo/bf-blender-cvs