[Bf-blender-cvs] [365ad27] cycles_split_kernel: Cycles: Actually implement work item functions for CPU

Mai Lavelle Thu, 27 Oct 2016 08:49:38 -0700

Commit: 365ad278f6ceff5ba83d596cbd4e348dff5de76b
Author: Mai Lavelle
Date:   Thu Oct 27 17:36:52 2016 +0200
Branches: cycles_split_kernel
https://developer.blender.org/rB365ad278f6ceff5ba83d596cbd4e348dff5de76b


Cycles: Actually implement work item functions for CPU

Didn't actually implement these yet, unfortunately we need to pass `kg` around
to a lot more functions for this.

===================================================================

M       intern/cycles/kernel/closure/alloc.h
M       intern/cycles/kernel/closure/bssrdf.h
M       intern/cycles/kernel/kernel_compat_cpu.h
M       intern/cycles/kernel/kernel_compat_opencl.h
M       intern/cycles/kernel/kernel_globals.h
M       intern/cycles/kernel/kernel_queues.h
M       intern/cycles/kernel/kernel_subsurface.h
M       intern/cycles/kernel/kernel_work_stealing.h
M       intern/cycles/kernel/split/kernel_background_buffer_update.h
M       intern/cycles/kernel/split/kernel_data_init.h
M       intern/cycles/kernel/split/kernel_direct_lighting.h
M       
intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h
M       intern/cycles/kernel/split/kernel_lamp_emission.h
M       intern/cycles/kernel/split/kernel_next_iteration_setup.h
M       intern/cycles/kernel/split/kernel_queue_enqueue.h
M       intern/cycles/kernel/split/kernel_scene_intersect.h
M       intern/cycles/kernel/split/kernel_shader_eval.h
M       intern/cycles/kernel/split/kernel_shadow_blocked.h
M       intern/cycles/kernel/svm/svm.h
M       intern/cycles/kernel/svm/svm_closure.h
M       intern/cycles/kernel/svm/svm_fresnel.h
M       intern/cycles/kernel/svm/svm_light_path.h

===================================================================

diff --git a/intern/cycles/kernel/closure/alloc.h 
b/intern/cycles/kernel/closure/alloc.h
index b7abc1e..76563ce 100644
--- a/intern/cycles/kernel/closure/alloc.h
+++ b/intern/cycles/kernel/closure/alloc.h
@@ -16,7 +16,7 @@
 
 CCL_NAMESPACE_BEGIN
 
-ccl_device ShaderClosure *closure_alloc(ShaderData *sd, int size, ClosureType 
type, float3 weight)
+ccl_device ShaderClosure *closure_alloc(KernelGlobals *kg, ShaderData *sd, int 
size, ClosureType type, float3 weight)
 {
        kernel_assert(size <= sizeof(ShaderClosure));
 
@@ -35,7 +35,7 @@ ccl_device ShaderClosure *closure_alloc(ShaderData *sd, int 
size, ClosureType ty
        return sc;
 }
 
-ccl_device ccl_addr_space void *closure_alloc_extra(ShaderData *sd, int size)
+ccl_device ccl_addr_space void *closure_alloc_extra(KernelGlobals *kg, 
ShaderData *sd, int size)
 {
        /* Allocate extra space for closure that need more parameters. We 
allocate
         * in chunks of sizeof(ShaderClosure) starting from the end of the 
closure
@@ -58,9 +58,9 @@ ccl_device ccl_addr_space void 
*closure_alloc_extra(ShaderData *sd, int size)
        return (ccl_addr_space void*)(ccl_fetch(sd, closure) + MAX_CLOSURE - 
num_closure_extra);
 }
 
-ccl_device_inline ShaderClosure *bsdf_alloc(ShaderData *sd, int size, float3 
weight)
+ccl_device_inline ShaderClosure *bsdf_alloc(KernelGlobals *kg, ShaderData *sd, 
int size, float3 weight)
 {
-       ShaderClosure *sc = closure_alloc(sd, size, CLOSURE_NONE_ID, weight);
+       ShaderClosure *sc = closure_alloc(kg, sd, size, CLOSURE_NONE_ID, 
weight);
 
        if(!sc)
                return NULL;
@@ -71,9 +71,9 @@ ccl_device_inline ShaderClosure *bsdf_alloc(ShaderData *sd, 
int size, float3 wei
 }
 
 #ifdef __OSL__
-ccl_device_inline ShaderClosure *bsdf_alloc_osl(ShaderData *sd, int size, 
float3 weight, void *data)
+ccl_device_inline ShaderClosure *bsdf_alloc_osl(KernelGlobals *kg, ShaderData 
*sd, int size, float3 weight, void *data)
 {
-       ShaderClosure *sc = closure_alloc(sd, size, CLOSURE_NONE_ID, weight);
+       ShaderClosure *sc = closure_alloc(kg, sd, size, CLOSURE_NONE_ID, 
weight);
 
        if(!sc)
                return NULL;
diff --git a/intern/cycles/kernel/closure/bssrdf.h 
b/intern/cycles/kernel/closure/bssrdf.h
index af0bbd8..a342025 100644
--- a/intern/cycles/kernel/closure/bssrdf.h
+++ b/intern/cycles/kernel/closure/bssrdf.h
@@ -344,9 +344,9 @@ ccl_device void bssrdf_none_sample(const ShaderClosure *sc, 
float xi, float *r,
 
 /* Generic */
 
-ccl_device_inline Bssrdf *bssrdf_alloc(ShaderData *sd, float3 weight)
+ccl_device_inline Bssrdf *bssrdf_alloc(KernelGlobals *kg, ShaderData *sd, 
float3 weight)
 {
-       Bssrdf *bssrdf = (Bssrdf*)closure_alloc(sd, sizeof(Bssrdf), 
CLOSURE_NONE_ID, weight);
+       Bssrdf *bssrdf = (Bssrdf*)closure_alloc(kg, sd, sizeof(Bssrdf), 
CLOSURE_NONE_ID, weight);
 
        if(!bssrdf)
                return NULL;
diff --git a/intern/cycles/kernel/kernel_compat_cpu.h 
b/intern/cycles/kernel/kernel_compat_cpu.h
index e6aa8f8..e347a1e 100644
--- a/intern/cycles/kernel/kernel_compat_cpu.h
+++ b/intern/cycles/kernel/kernel_compat_cpu.h
@@ -45,12 +45,13 @@
 #define ccl_addr_space
 
 #define ccl_local_id(d) 0
-#define ccl_global_id(d) 0
+#define ccl_global_id(d) (kg->global_id[d])
 
 #define ccl_local_size(d) 1
-#define ccl_global_size(d) 1
+#define ccl_global_size(d) (kg->global_size[d])
 
-#define ccl_num_groups(d) 1
+#define ccl_group_id(d) ccl_global_id(d)
+#define ccl_num_groups(d) ccl_global_size(d)
 
 /* On x86_64, versions of glibc < 2.16 have an issue where expf is
  * much slower than the double version.  This was fixed in glibc 2.16.
diff --git a/intern/cycles/kernel/kernel_compat_opencl.h 
b/intern/cycles/kernel/kernel_compat_opencl.h
index b60eb14..ea99fdb 100644
--- a/intern/cycles/kernel/kernel_compat_opencl.h
+++ b/intern/cycles/kernel/kernel_compat_opencl.h
@@ -55,6 +55,7 @@
 #define ccl_local_size(d) get_local_size(d)
 #define ccl_global_size(d) get_global_size(d)
 
+#define ccl_group_id(d) get_group_id(d)
 #define ccl_num_groups(d) get_num_groups(d)
 
 /* Selective nodes compilation. */
diff --git a/intern/cycles/kernel/kernel_globals.h 
b/intern/cycles/kernel/kernel_globals.h
index a2d0057..121b840 100644
--- a/intern/cycles/kernel/kernel_globals.h
+++ b/intern/cycles/kernel/kernel_globals.h
@@ -70,6 +70,9 @@ typedef struct KernelGlobals {
        Intersection *isect_shadow;
        SplitData split_data;
        SplitParams split_param_data;
+
+       int2 global_size;
+       int2 global_id;
 } KernelGlobals;
 
 #endif  /* __KERNEL_CPU__ */
diff --git a/intern/cycles/kernel/kernel_queues.h 
b/intern/cycles/kernel/kernel_queues.h
index 8d3176f..212ef98 100644
--- a/intern/cycles/kernel/kernel_queues.h
+++ b/intern/cycles/kernel/kernel_queues.h
@@ -49,6 +49,7 @@ ccl_device void enqueue_ray_index(
  * is no more ray to allocate to other threads.
  */
 ccl_device int get_ray_index(
+        KernelGlobals *kg,
         int thread_index,       /* Global thread index. */
         int queue_number,       /* Queue to operate on. */
         ccl_global int *queues, /* Buffer of all queues. */
diff --git a/intern/cycles/kernel/kernel_subsurface.h 
b/intern/cycles/kernel/kernel_subsurface.h
index 52c05b8..5bdb3a6 100644
--- a/intern/cycles/kernel/kernel_subsurface.h
+++ b/intern/cycles/kernel/kernel_subsurface.h
@@ -140,7 +140,7 @@ ccl_device_inline float3 subsurface_scatter_eval(ShaderData 
*sd,
 }
 
 /* replace closures with a single diffuse bsdf closure after scatter step */
-ccl_device void subsurface_scatter_setup_diffuse_bsdf(ShaderData *sd, float3 
weight, bool hit, float3 N)
+ccl_device void subsurface_scatter_setup_diffuse_bsdf(KernelGlobals *kg, 
ShaderData *sd, float3 weight, bool hit, float3 N)
 {
        sd->flag &= ~SD_CLOSURE_FLAGS;
        sd->randb_closure = 0.0f;
@@ -148,7 +148,7 @@ ccl_device void 
subsurface_scatter_setup_diffuse_bsdf(ShaderData *sd, float3 wei
        sd->num_closure_extra = 0;
 
        if(hit) {
-               DiffuseBsdf *bsdf = (DiffuseBsdf*)bsdf_alloc(sd, 
sizeof(DiffuseBsdf), weight);
+               DiffuseBsdf *bsdf = (DiffuseBsdf*)bsdf_alloc(kg, sd, 
sizeof(DiffuseBsdf), weight);
 
                if(bsdf) {
                        bsdf->N = N;
@@ -373,7 +373,7 @@ ccl_device_noinline void subsurface_scatter_multi_setup(
        subsurface_color_bump_blur(kg, sd, state, state_flag, &weight, &N);
 
        /* Setup diffuse BSDF. */
-       subsurface_scatter_setup_diffuse_bsdf(sd, weight, true, N);
+       subsurface_scatter_setup_diffuse_bsdf(kg, sd, weight, true, N);
 }
 
 /* subsurface scattering step, from a point on the surface to another nearby 
point on the same object */
@@ -463,7 +463,7 @@ ccl_device void subsurface_scatter_step(KernelGlobals *kg, 
ShaderData *sd, PathS
        subsurface_color_bump_blur(kg, sd, state, state_flag, &eval, &N);
 
        /* setup diffuse bsdf */
-       subsurface_scatter_setup_diffuse_bsdf(sd, eval, (ss_isect.num_hits > 
0), N);
+       subsurface_scatter_setup_diffuse_bsdf(kg, sd, eval, (ss_isect.num_hits 
> 0), N);
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_work_stealing.h 
b/intern/cycles/kernel/kernel_work_stealing.h
index afb9ac7..859994e 100644
--- a/intern/cycles/kernel/kernel_work_stealing.h
+++ b/intern/cycles/kernel/kernel_work_stealing.h
@@ -46,7 +46,8 @@ ccl_device uint get_group_id_with_ray_index(uint ray_index,
        }
 }
 
-ccl_device uint get_total_work(uint tile_dim_x,
+ccl_device uint get_total_work(KernelGlobals *kg,
+                    uint tile_dim_x,
                     uint tile_dim_y,
                     uint grp_idx,
                     uint grp_idy,
@@ -73,7 +74,8 @@ ccl_device uint get_total_work(uint tile_dim_x,
 
 /* Returns 0 in case there is no next work available */
 /* Returns 1 in case work assigned is valid */
-ccl_device int get_next_work(ccl_global uint *work_pool,
+ccl_device int get_next_work(KernelGlobals *kg,
+                  ccl_global uint *work_pool,
                   ccl_private uint *my_work,
                   uint tile_dim_x,
                   uint tile_dim_y,
@@ -91,7 +93,8 @@ ccl_device int get_next_work(ccl_global uint *work_pool,
                                                   tile_dim_y,
                                                   parallel_samples,
                                                   1);
-       uint total_work = get_total_work(tile_dim_x,
+       uint total_work = get_total_work(kg,
+                                        tile_dim_x,
                                         tile_dim_y,
                                         grp_idx,
                                         grp_idy,
@@ -103,7 +106,8 @@ ccl_device int get_next_work(ccl_global uint *work_pool,
 
 /* This function assumes that the passed my_work is valid. */
 /* Decode sample number w.r.t. assigned my_work. */
-ccl_device uint get_my_sample(uint my_work,
+ccl_device uint get_my_sample(KernelGlobals *kg,
+                   uint my_work,
                    uint tile_dim_x,
                    uint tile_dim_y,
                    uint parallel_samples,
@@ -138,7 +142,8 @@ ccl_device uint get_my_sample(uint my_work,
 }
 
 /* Decode pixel and tile position w.r.t. assigned my_work. */
-ccl_device void get_pixel_tile_position(ccl_private uint *pixel_x,
+ccl_device void get_pixel_tile_position(KernelGlobals *kg,
+                             ccl_private uint *pixel_x,
                              ccl_private uint *pixel_y,
                              ccl_private uint *tile_x,
                              ccl_private uint *tile_y,
diff --git a/intern/cycles/kernel/split/kernel_background_buffer_update.h 
b/intern/cycles/kernel/split/kernel_background_buffer_update.h
index 24c821d..eb40473 100644
--- a/intern/cycles/kernel/split/kernel_background_buffer_update.h
+++ b/intern/cycles/kernel/split/kernel_background_buffer_update.h
@@ -83,7 +83,7 @@ ccl_device void kernel_background_buffer_update(KernelGlobals 
*kg)
                split_params->queue_index[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] 
= 0;
        }
        char enqueue_flag = 0;
-       ray_index = get_ray_index(ray_index,
+       ray_index = get_ray_index(kg, ray_index,
                                  QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
                                  split_state->queue_data,
                                  split_params->queue_size,
@@ -142,8 +142,8 @@ ccl_device void 
kernel_background_buffer_update(KernelGlobals *kg)
 
 #ifdef __WORK_STEALING__
        my_work = split_state->work_array[ray_index];
-       sample = get_my_sample(my_work, sw, sh, parallel_samples, ray_index) + 
split_params->start_sample;
-       g

@@ Diff output truncated at 10240 characters. @@

_______________________________________________
Bf-blender-cvs mailing list
[email protected]
https://lists.blender.org/mailman/listinfo/bf-blender-cvs

[Bf-blender-cvs] [365ad27] cycles_split_kernel: Cycles: Actually implement work item functions for CPU

Reply via email to