[Bf-blender-cvs] [f30d722461] cycles_split_kernel: Cycles: Clean up variables in host side split kernel logic

Mai Lavelle Thu, 05 Jan 2017 19:03:51 -0800

Commit: f30d722461f6b7ac446c982b47ce9ca1f9222bb8
Author: Mai Lavelle
Date:   Thu Dec 22 00:53:35 2016 -0500
Branches: cycles_split_kernel
https://developer.blender.org/rBf30d722461f6b7ac446c982b47ce9ca1f9222bb8


Cycles: Clean up variables in host side split kernel logic

Was a bit messy, lots of duplication, hard to tell what was what.

===================================================================

M       intern/cycles/device/device_split_kernel.cpp

===================================================================

diff --git a/intern/cycles/device/device_split_kernel.cpp 
b/intern/cycles/device/device_split_kernel.cpp
index dcbb6821d6..0d968d450e 100644
--- a/intern/cycles/device/device_split_kernel.cpp
+++ b/intern/cycles/device/device_split_kernel.cpp
@@ -102,9 +102,7 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task,
        tile.buffer_rng_state_stride = tile.stride;
        tile.stride = tile.w;
 
-       size_t global_size[2];
        size_t local_size[2];
-
        {
                int2 lsize = device->split_kernel_local_size();
                local_size[0] = lsize[0];
@@ -143,40 +141,38 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task,
                                              tile.buffers->params.height);
        }
 
-       int d_w = tile.w;
-       int d_h = tile.h;
-
+       /* set global_size and num_parallel_samples */
+       size_t global_size[2];
+       unsigned int num_parallel_samples;
+       {
 #ifdef __WORK_STEALING__
-       global_size[0] = (((d_w - 1) / local_size[0]) + 1) * local_size[0];
-       global_size[1] = (((d_h - 1) / local_size[1]) + 1) * local_size[1];
-       unsigned int num_parallel_samples = 1;
+               global_size[0] = round_up(tile.w, local_size[0]);
+               global_size[1] = round_up(tile.h, local_size[1]);
+               num_parallel_samples = 1;
 #else
-       global_size[1] = (((d_h - 1) / local_size[1]) + 1) * local_size[1];
-       unsigned int num_threads = max_render_feasible_tile_size.x *
-                                  max_render_feasible_tile_size.y;
-       unsigned int num_tile_columns_possible = num_threads / global_size[1];
-       /* Estimate number of parallel samples that can be
-        * processed in parallel.
-        */
-       unsigned int num_parallel_samples = min(num_tile_columns_possible / d_w,
-                                               tile.num_samples);
-       /* Wavefront size in AMD is 64.
-        * TODO(sergey): What about other platforms?
-        */
-       if(num_parallel_samples >= 64) {
-               /* TODO(sergey): Could use generic round-up here. */
-               num_parallel_samples = (num_parallel_samples / 64) * 64;
-       }
-       assert(num_parallel_samples != 0);
+               global_size[1] = round_up(tile.h, local_size[1]);
+               unsigned int num_threads = max_render_feasible_tile_size.x * 
max_render_feasible_tile_size.y;
+               unsigned int num_tile_columns_possible = num_threads / 
global_size[1];
+               /* Estimate number of parallel samples that can be
+                * processed in parallel.
+                */
+               num_parallel_samples = min(num_tile_columns_possible / tile.w, 
tile.num_samples);
+               /* Wavefront size in AMD is 64.
+                * TODO(sergey): What about other platforms?
+                */
+               if(num_parallel_samples >= 64) {
+                       /* TODO(sergey): Could use generic round-up here. */
+                       num_parallel_samples = (num_parallel_samples / 64) * 64;
+               }
+               assert(num_parallel_samples != 0);
 
-       global_size[0] = d_w * num_parallel_samples;
+               global_size[0] = tile.w * num_parallel_samples;
 #endif  /* __WORK_STEALING__ */
 
-       assert(global_size[0] * global_size[1] <=
-              max_render_feasible_tile_size.x * 
max_render_feasible_tile_size.y);
+               assert(global_size[0] * global_size[1] <= 
max_render_feasible_tile_size.x * max_render_feasible_tile_size.y);
+       }
 
-       int num_global_elements = max_render_feasible_tile_size.x *
-                                 max_render_feasible_tile_size.y;
+       int num_global_elements = max_render_feasible_tile_size.x * 
max_render_feasible_tile_size.y;
 
        /* Allocate all required global memory once. */
        if(first_tile) {
@@ -185,10 +181,8 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task,
 #ifdef __WORK_STEALING__
                /* Calculate max groups */
                size_t max_global_size[2];
-               size_t tile_x = max_render_feasible_tile_size.x;
-               size_t tile_y = max_render_feasible_tile_size.y;
-               max_global_size[0] = (((tile_x - 1) / local_size[0]) + 1) * 
local_size[0];
-               max_global_size[1] = (((tile_y - 1) / local_size[1]) + 1) * 
local_size[1];
+               max_global_size[0] = round_up(max_render_feasible_tile_size.x, 
local_size[0]);
+               max_global_size[1] = round_up(max_render_feasible_tile_size.y, 
local_size[1]);
 
                /* Denotes the maximum work groups possible w.r.t. current tile 
size. */
                unsigned int max_work_groups = (max_global_size[0] * 
max_global_size[1]) /
@@ -316,12 +310,8 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task,
 
                size_t sum_all_radiance_local_size[2] = {16, 16};
                size_t sum_all_radiance_global_size[2];
-               sum_all_radiance_global_size[0] =
-                       (((d_w - 1) / sum_all_radiance_local_size[0]) + 1) *
-                       sum_all_radiance_local_size[0];
-               sum_all_radiance_global_size[1] =
-                       (((d_h - 1) / sum_all_radiance_local_size[1]) + 1) *
-                       sum_all_radiance_local_size[1];
+               sum_all_radiance_global_size[0] = round_up(tile.w, 
sum_all_radiance_local_size[0]);
+               sum_all_radiance_global_size[1] = round_up(tile.h, 
sum_all_radiance_local_size[1]);
 
                ENQUEUE_SPLIT_KERNEL(sum_all_radiance,
                                     sum_all_radiance_global_size,

_______________________________________________
Bf-blender-cvs mailing list
[email protected]
https://lists.blender.org/mailman/listinfo/bf-blender-cvs

[Bf-blender-cvs] [f30d722461] cycles_split_kernel: Cycles: Clean up variables in host side split kernel logic

Reply via email to