[Bf-blender-cvs] [63a604e] cycles_split_kernel: Cycles: Implement enqueue_split_kernel_data_init for OpenCL devices

Mai Lavelle Tue, 18 Oct 2016 09:14:32 -0700

Commit: 63a604e0cf8a43f37c2e5444d71102d345350308
Author: Mai Lavelle
Date:   Tue Oct 18 18:09:42 2016 +0200
Branches: cycles_split_kernel
https://developer.blender.org/rB63a604e0cf8a43f37c2e5444d71102d345350308


Cycles: Implement enqueue_split_kernel_data_init for OpenCL devices

The `enqueue_split_kernel_data_init()` function will allow each device type to
set up the various data buffers how ever they need to without concerning the
rest of the split kernel logic.

===================================================================

M       intern/cycles/device/device.h
M       intern/cycles/device/opencl/opencl_split.cpp
M       intern/cycles/render/buffers.cpp
M       intern/cycles/render/buffers.h

===================================================================

diff --git a/intern/cycles/device/device.h b/intern/cycles/device/device.h
index f79678d..31dbb90 100644
--- a/intern/cycles/device/device.h
+++ b/intern/cycles/device/device.h
@@ -296,7 +296,20 @@ public:
        { return true; }
 
        /* split kernel */
-       virtual bool enqueue_split_kernel_data_init()
+       virtual bool enqueue_split_kernel_data_init(const KernelDimensions& 
/*dim*/,
+                                                   RenderTile& /*rtile*/,
+                                                   int /*num_global_elements*/,
+                                                   int 
/*num_parallel_samples*/,
+                                                   device_memory& 
/*kernel_globals*/,
+                                                   device_memory& 
/*kernel_data*/,
+                                                   device_memory& 
/*split_data*/,
+                                                   device_memory& 
/*ray_state*/,
+                                                   device_memory& 
/*queue_index*/,
+                                                   device_memory& 
/*use_queues_flag*/
+#ifdef __WORK_STEALING__
+                                                   , device_memory& 
/*work_pool_wgs*/
+#endif
+                                                   )
        {
                assert(!"not implemented for this device");
                return false;
diff --git a/intern/cycles/device/opencl/opencl_split.cpp 
b/intern/cycles/device/opencl/opencl_split.cpp
index 7d8dd95..6efeb70 100644
--- a/intern/cycles/device/opencl/opencl_split.cpp
+++ b/intern/cycles/device/opencl/opencl_split.cpp
@@ -29,62 +29,6 @@
 
 CCL_NAMESPACE_BEGIN
 
-/* TODO(sergey): This is to keep tile split on OpenCL level working
- * for now, since without this view-port render does not work as it
- * should.
- *
- * Ideally it'll be done on the higher level, but we need to get ready
- * for merge rather soon, so let's keep split logic private here in
- * the file.
- */
-class SplitRenderTile : public RenderTile {
-public:
-       SplitRenderTile()
-               : RenderTile(),
-                 buffer_offset_x(0),
-                 buffer_offset_y(0),
-                 rng_state_offset_x(0),
-                 rng_state_offset_y(0),
-                 buffer_rng_state_stride(0) {}
-
-       explicit SplitRenderTile(RenderTile& tile)
-               : RenderTile(),
-                 buffer_offset_x(0),
-                 buffer_offset_y(0),
-                 rng_state_offset_x(0),
-                 rng_state_offset_y(0),
-                 buffer_rng_state_stride(0)
-       {
-               x = tile.x;
-               y = tile.y;
-               w = tile.w;
-               h = tile.h;
-               start_sample = tile.start_sample;
-               num_samples = tile.num_samples;
-               sample = tile.sample;
-               resolution = tile.resolution;
-               offset = tile.offset;
-               stride = tile.stride;
-               buffer = tile.buffer;
-               rng_state = tile.rng_state;
-               buffers = tile.buffers;
-       }
-
-       /* Split kernel is device global memory constrained;
-        * hence split kernel cant render big tile size's in
-        * one go. If the user sets a big tile size (big tile size
-        * is a term relative to the available device global memory),
-        * we split the tile further and then call path_trace on
-        * each of those split tiles. The following variables declared,
-        * assist in achieving that purpose
-        */
-       int buffer_offset_x;
-       int buffer_offset_y;
-       int rng_state_offset_x;
-       int rng_state_offset_y;
-       int buffer_rng_state_stride;
-};
-
 class OpenCLSplitKernelFunction : public SplitKernelFunction {
 public:
        OpenCLDeviceBase* device;
@@ -346,20 +290,101 @@ public:
 #endif
        }
 
+       virtual bool enqueue_split_kernel_data_init(const KernelDimensions& dim,
+                                                   RenderTile& rtile,
+                                                   int num_global_elements,
+                                                   int num_parallel_samples,
+                                                   device_memory& 
kernel_globals,
+                                                   device_memory& kernel_data,
+                                                   device_memory& split_data,
+                                                   device_memory& ray_state,
+                                                   device_memory& queue_index,
+                                                   device_memory& 
use_queues_flag,
+#ifdef __WORK_STEALING__
+                                                   device_memory& work_pool_wgs
+#endif
+                                                   )
+       {
+               cl_int dQueue_size = dim.global_size[0] * dim.global_size[1];
+
+               /* Set the range of samples to be processed for every ray in
+                * path-regeneration logic.
+                */
+               cl_int start_sample = rtile.start_sample;
+               cl_int end_sample = rtile.start_sample + rtile.num_samples;
+
+               cl_uint start_arg_index =
+                       kernel_set_args(program_data_init(),
+                                       0,
+                                       kernel_globals,
+                                       kernel_data,
+                                                       split_data,
+                                       num_global_elements,
+                                                       ray_state,
+                                       rtile.rng_state);
+
+/* TODO(sergey): Avoid map lookup here. */
+#define KERNEL_TEX(type, ttype, name) \
+       set_kernel_arg_mem(program_data_init(), &start_arg_index, #name);
+#include "kernel_textures.h"
+#undef KERNEL_TEX
+
+               start_arg_index +=
+                       kernel_set_args(program_data_init(),
+                                       start_arg_index,
+                                       start_sample,
+                                       end_sample,
+                                       rtile.x,
+                                       rtile.y,
+                                       rtile.w,
+                                       rtile.h,
+                                       rtile.offset,
+                                       rtile.stride,
+                                       rtile.rng_state_offset_x,
+                                       rtile.rng_state_offset_y,
+                                       rtile.buffer_rng_state_stride,
+                                       queue_index,
+                                       dQueue_size,
+                                       use_queues_flag,
+#ifdef __WORK_STEALING__
+                                       work_pool_wgs,
+                                       rtile.num_samples,
+#endif
+                                       num_parallel_samples,
+                                       rtile.buffer_offset_x,
+                                       rtile.buffer_offset_y,
+                                       rtile.buffer_rng_state_stride,
+                                                       rtile.buffer);
+
+               /* Enqueue ckPathTraceKernel_data_init kernel. */
+               ciErr = clEnqueueNDRangeKernel(cqCommandQueue,
+                                              program_data_init(),
+                                              2,
+                                              NULL,
+                                              dim.global_size,
+                                              dim.local_size,
+                                              0,
+                                              NULL,
+                                              NULL);
+
+               opencl_assert_err(ciErr, "clEnqueueNDRangeKernel");
+
+               if(ciErr != CL_SUCCESS) {
+                       string message = string_printf("OpenCL error: %s in 
clEnqueueNDRangeKernel()",
+                                                      clewErrorString(ciErr));
+                       opencl_error(message);
+                       return false;
+               }
+
+               return true;
+       }
+
        void path_trace(DeviceTask *task,
-                       SplitRenderTile& rtile,
+                       RenderTile& rtile,
                        int2 max_render_feasible_tile_size)
        {
                /* cast arguments to cl types */
                device_memory& d_data = *const_mem_map["__data"];
-               device_ptr d_buffer = rtile.buffer;
-               device_ptr d_rng_state = rtile.rng_state;
-               cl_int d_x = rtile.x;
-               cl_int d_y = rtile.y;
-               cl_int d_w = rtile.w;
-               cl_int d_h = rtile.h;
-               cl_int d_offset = rtile.offset;
-               cl_int d_stride = rtile.stride;
 
                /* Make sure that set render feasible tile size is a multiple 
of local
                 * work size dimensions.
@@ -371,12 +396,8 @@ public:
                size_t local_size[2] = {SPLIT_KERNEL_LOCAL_SIZE_X,
                                        SPLIT_KERNEL_LOCAL_SIZE_Y};
 
-               /* Set the range of samples to be processed for every ray in
-                * path-regeneration logic.
-                */
-               cl_int start_sample = rtile.start_sample;
-               cl_int end_sample = rtile.start_sample + rtile.num_samples;
-               cl_int num_samples = rtile.num_samples;
+               int d_w = rtile.w;
+               int d_h = rtile.h;
 
 #ifdef __WORK_STEALING__
                global_size[0] = (((d_w - 1) / local_size[0]) + 1) * 
local_size[0];
@@ -438,68 +459,21 @@ public:
                                                                     
per_thread_output_buffer_size));
                }
 
-               cl_int dQueue_size = global_size[0] * global_size[1];
-
-               cl_uint start_arg_index =
-                       kernel_set_args(program_data_init(),
-                                       0,
-                                       kgbuffer,
-                                       d_data,
-                                                       split_data,
-                                       num_global_elements,
-                                                       ray_state,
-                                       d_rng_state);
-
-/* TODO(sergey): Avoid map lookup here. */
-#define KERNEL_TEX(type, ttype, name) \
-       set_kernel_arg_mem(program_data_init(), &start_arg_index, #name);
-#include "kernel_textures.h"
-#undef KERNEL_TEX
-
-               start_arg_index +=
-                       kernel_set_args(program_data_init(),
-                                       start_arg_index,
-                                       start_sample,
-                                       end_sample,
-                                       d_x,
-                                       d_y,
-                                       d_w,
-                                       d_h,
-                                       d_offset,
-                                       d_stride,
-                                       rtile.rng_state_offset_x,
-                                       rtile.rng_state_offset_y,
-                                       rtile.buffer_rng_state_stride,
-                                       queue_index,
-                                       dQueue_size,
-                                       use_queues_flag,
+               
if(!enqueue_split_kernel_data_init(KernelDimensions(global_size, local_size),
+                                              rtile,
+                                              num_global_elements,
+                                              num_parallel_samples,
+                                              kgbuffer,
+                                              d_data,
+                                              split_data,
+                                              ray_state,
+                                              queue_index,
+                                              use_queues_flag,
 #ifdef __WORK_STEALING__
-                                       work_pool_wgs,
-                                       num_samples,
+                                              work_pool_wgs
 #endif
-                                       num_parallel_samples,
-                                       rtile.buffer_offset_x,
-                                       rtile.buffer_offset_y,
-                                       rtile.buffer_rng_state_stride,
-                                                       d_buffer);
-
-               /* Enqueue ckPathTraceKernel_data_init kernel. */
-               ciErr = clEnqueueNDRangeKernel(cqCommandQueue,
-                                              program_data_init(),
-                                              2,
-                                              NULL,
-                                        

@@ Diff output truncated at 10240 characters. @@

_______________________________________________
Bf-blender-cvs mailing list
Bf-blender-cvs@blender.org
https://lists.blender.org/mailman/listinfo/bf-blender-cvs

[Bf-blender-cvs] [63a604e] cycles_split_kernel: Cycles: Implement enqueue_split_kernel_data_init for OpenCL devices

Reply via email to