[Bf-blender-cvs] [84645df] cycles_kernel_split: Cycles kernel split: Cleanup of work stealing module

Sergey Sharybin Fri, 08 May 2015 07:22:38 -0700

Commit: 84645dff808c177dbd197c2e80ec95a4c2347d0f
Author: Sergey Sharybin
Date:   Fri May 8 19:21:09 2015 +0500
Branches: cycles_kernel_split
https://developer.blender.org/rB84645dff808c177dbd197c2e80ec95a4c2347d0f


Cycles kernel split: Cleanup of work stealing module

===================================================================

M       intern/cycles/kernel/kernel_work_stealing.h

===================================================================

diff --git a/intern/cycles/kernel/kernel_work_stealing.h 
b/intern/cycles/kernel/kernel_work_stealing.h
index 4833fac..9b83d97 100644
--- a/intern/cycles/kernel/kernel_work_stealing.h
+++ b/intern/cycles/kernel/kernel_work_stealing.h
@@ -27,113 +27,163 @@
 #pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable
 #endif
 
-unsigned int get_group_id_with_ray_index(unsigned int ray_index,
-                                         unsigned int tile_dim_x,
-                                         unsigned int tile_dim_y,
-                                         unsigned int parallel_samples,
-                                         int dim)
+uint get_group_id_with_ray_index(uint ray_index,
+                                 uint tile_dim_x,
+                                 uint tile_dim_y,
+                                 uint parallel_samples,
+                                 int dim)
 {
-       unsigned int retval;
        if(dim == 0) {
-               unsigned int x_span = ray_index % (tile_dim_x * 
parallel_samples);
-               retval = x_span / get_local_size(0);
+               uint x_span = ray_index % (tile_dim_x * parallel_samples);
+               return x_span / get_local_size(0);
        }
-       else if(dim == 1) {
-               unsigned int y_span = ray_index / (tile_dim_x * 
parallel_samples);
-               retval = y_span / get_local_size(1);
+       else /*if(dim == 1)*/ {
+               kernel_assert(dim == 1);
+               uint y_span = ray_index / (tile_dim_x * parallel_samples);
+               return y_span / get_local_size(1);
        }
-       return retval;
 }
 
-unsigned int get_total_work(unsigned int tile_dim_x,
-                            unsigned int tile_dim_y,
-                            unsigned int grp_idx,
-                            unsigned int grp_idy,
-                            unsigned int num_samples)
+uint get_total_work(uint tile_dim_x,
+                    uint tile_dim_y,
+                    uint grp_idx,
+                    uint grp_idy,
+                    uint num_samples)
 {
-       unsigned int threads_within_tile_border_x;
-       unsigned int threads_within_tile_border_y;
-
-       threads_within_tile_border_x = (grp_idx == (get_num_groups(0) - 1)) ? 
tile_dim_x % get_local_size(0) : get_local_size(0);
-       threads_within_tile_border_y = (grp_idy == (get_num_groups(1) - 1)) ? 
tile_dim_y % get_local_size(1) : get_local_size(1);
-
-       threads_within_tile_border_x = (threads_within_tile_border_x == 0) ? 
get_local_size(0) : threads_within_tile_border_x;
-       threads_within_tile_border_y = (threads_within_tile_border_y == 0) ? 
get_local_size(1) : threads_within_tile_border_y;
-
-       return (threads_within_tile_border_x * threads_within_tile_border_y * 
num_samples);
+       uint threads_within_tile_border_x =
+               (grp_idx == (get_num_groups(0) - 1)) ? tile_dim_x % 
get_local_size(0)
+                                                    : get_local_size(0);
+       uint threads_within_tile_border_y =
+               (grp_idy == (get_num_groups(1) - 1)) ? tile_dim_y % 
get_local_size(1)
+                                                    : get_local_size(1);
+
+       threads_within_tile_border_x =
+               (threads_within_tile_border_x == 0) ? get_local_size(0)
+                                                   : 
threads_within_tile_border_x;
+       threads_within_tile_border_y =
+               (threads_within_tile_border_y == 0) ? get_local_size(1)
+                                                   : 
threads_within_tile_border_y;
+
+       return threads_within_tile_border_x *
+              threads_within_tile_border_y *
+              num_samples;
 }
 
 /* Returns 0 in case there is no next work available */
 /* Returns 1 in case work assigned is valid */
-int get_next_work(ccl_global unsigned int *work_pool,
-                  ccl_private unsigned int *my_work,
-                  unsigned int tile_dim_x,
-                  unsigned int tile_dim_y,
-                  unsigned int num_samples,
-                  unsigned int parallel_samples,
-                  unsigned int ray_index)
+int get_next_work(ccl_global uint *work_pool,
+                  ccl_private uint *my_work,
+                  uint tile_dim_x,
+                  uint tile_dim_y,
+                  uint num_samples,
+                  uint parallel_samples,
+                  uint ray_index)
 {
-               unsigned int grp_idx = get_group_id_with_ray_index(ray_index, 
tile_dim_x, tile_dim_y, parallel_samples, 0);
-               unsigned int grp_idy = get_group_id_with_ray_index(ray_index, 
tile_dim_x, tile_dim_y, parallel_samples, 1);
-               unsigned int total_work = get_total_work(tile_dim_x, 
tile_dim_y, grp_idx, grp_idy, num_samples);
-               unsigned int group_index = grp_idy * get_num_groups(0) + 
grp_idx;
-               *my_work = atomic_inc(&work_pool[group_index]);
-               return = (*my_work < total_work) ? 1 : 0;
+       uint grp_idx = get_group_id_with_ray_index(ray_index,
+                                                  tile_dim_x,
+                                                  tile_dim_y,
+                                                  parallel_samples,
+                                                  0);
+       uint grp_idy = get_group_id_with_ray_index(ray_index,
+                                                  tile_dim_x,
+                                                  tile_dim_y,
+                                                  parallel_samples,
+                                                  1);
+       uint total_work = get_total_work(tile_dim_x,
+                                        tile_dim_y,
+                                        grp_idx,
+                                        grp_idy,
+                                        num_samples);
+       uint group_index = grp_idy * get_num_groups(0) + grp_idx;
+       *my_work = atomic_inc(&work_pool[group_index]);
+       return (*my_work < total_work) ? 1 : 0;
 }
 
-/* This function assumes that the passed my_work is valid */
-/* Decode sample number w.r.t. assigned my_work */
-unsigned int get_my_sample(unsigned int my_work,
-                           unsigned int tile_dim_x,
-                           unsigned int tile_dim_y,
-                           unsigned int parallel_samples,
-                           unsigned int ray_index)
+/* This function assumes that the passed my_work is valid. */
+/* Decode sample number w.r.t. assigned my_work. */
+uint get_my_sample(uint my_work,
+                   uint tile_dim_x,
+                   uint tile_dim_y,
+                   uint parallel_samples,
+                   uint ray_index)
 {
-       unsigned int grp_idx = get_group_id_with_ray_index(ray_index, 
tile_dim_x, tile_dim_y, parallel_samples, 0);
-       unsigned int grp_idy = get_group_id_with_ray_index(ray_index, 
tile_dim_x, tile_dim_y, parallel_samples, 1);
-       unsigned int threads_within_tile_border_x;
-       unsigned int threads_within_tile_border_y;
-
-       threads_within_tile_border_x = (grp_idx == (get_num_groups(0) - 1)) ? 
tile_dim_x % get_local_size(0) : get_local_size(0);
-       threads_within_tile_border_y = (grp_idy == (get_num_groups(1) - 1)) ? 
tile_dim_y % get_local_size(1) : get_local_size(1);
-
-       threads_within_tile_border_x = (threads_within_tile_border_x == 0) ? 
get_local_size(0) : threads_within_tile_border_x;
-       threads_within_tile_border_y = (threads_within_tile_border_y == 0) ? 
get_local_size(1) : threads_within_tile_border_y;
-
-       return (my_work / (threads_within_tile_border_x * 
threads_within_tile_border_y));
+       uint grp_idx = get_group_id_with_ray_index(ray_index,
+                                                  tile_dim_x,
+                                                  tile_dim_y,
+                                                  parallel_samples,
+                                                  0);
+       uint grp_idy = get_group_id_with_ray_index(ray_index,
+                                                  tile_dim_x,
+                                                  tile_dim_y,
+                                                  parallel_samples,
+                                                  1);
+       uint threads_within_tile_border_x =
+               (grp_idx == (get_num_groups(0) - 1)) ? tile_dim_x % 
get_local_size(0)
+                                                    : get_local_size(0);
+       uint threads_within_tile_border_y =
+               (grp_idy == (get_num_groups(1) - 1)) ? tile_dim_y % 
get_local_size(1)
+                                                    : get_local_size(1);
+
+       threads_within_tile_border_x =
+               (threads_within_tile_border_x == 0) ? get_local_size(0)
+                                                   : 
threads_within_tile_border_x;
+       threads_within_tile_border_y =
+               (threads_within_tile_border_y == 0) ? get_local_size(1)
+                                                   : 
threads_within_tile_border_y;
+
+       return my_work /
+              (threads_within_tile_border_x * threads_within_tile_border_y);
 }
 
-/* Decode pixel and tile position w.r.t. assigned my_work */
-void get_pixel_tile_position(ccl_private unsigned int *pixel_x,
-                             ccl_private unsigned int *pixel_y,
-                             ccl_private unsigned int *tile_x,
-                             ccl_private unsigned int *tile_y,
-                             unsigned int my_work,
-                             unsigned int tile_dim_x,
-                             unsigned int tile_dim_y,
-                             unsigned int tile_offset_x,
-                             unsigned int tile_offset_y,
-                             unsigned int parallel_samples,
-                             unsigned int ray_index)
+/* Decode pixel and tile position w.r.t. assigned my_work. */
+void get_pixel_tile_position(ccl_private uint *pixel_x,
+                             ccl_private uint *pixel_y,
+                             ccl_private uint *tile_x,
+                             ccl_private uint *tile_y,
+                             uint my_work,
+                             uint tile_dim_x,
+                             uint tile_dim_y,
+                             uint tile_offset_x,
+                             uint tile_offset_y,
+                             uint parallel_samples,
+                             uint ray_index)
 {
-       unsigned int grp_idx = get_group_id_with_ray_index(ray_index, 
tile_dim_x, tile_dim_y, parallel_samples, 0);
-       unsigned int grp_idy = get_group_id_with_ray_index(ray_index, 
tile_dim_x, tile_dim_y, parallel_samples, 1);
-       unsigned int threads_within_tile_border_x;
-       unsigned int threads_within_tile_border_y;
-
-       threads_within_tile_border_x = (grp_idx == (get_num_groups(0) - 1)) ? 
tile_dim_x % get_local_size(0) : get_local_size(0);
-       thre

@@ Diff output truncated at 10240 characters. @@

_______________________________________________
Bf-blender-cvs mailing list
[email protected]
http://lists.blender.org/mailman/listinfo/bf-blender-cvs

[Bf-blender-cvs] [84645df] cycles_kernel_split: Cycles kernel split: Cleanup of work stealing module

Reply via email to