Commit: d75d262318c845058a6410fb4c764dadbaa20b6b
Author: Mai Lavelle
Date:   Mon Nov 27 02:21:06 2017 -0500
Branches: split-kernel-faster-building
https://developer.blender.org/rBd75d262318c845058a6410fb4c764dadbaa20b6b

Cycles: Split shader_eval out of the kernel_lamp_emission kernel

With this kernels for BWM and classroom scenes are building in half the
time as master. Render times are 1% faster as well.

===================================================================

M       intern/cycles/device/device_split_kernel.cpp
M       intern/cycles/kernel/kernel_types.h
M       intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
M       intern/cycles/kernel/kernels/cuda/kernel_split.cu
M       intern/cycles/kernel/kernels/opencl/kernel_lamp_emission.cl
M       intern/cycles/kernel/split/kernel_do_volume.h
M       intern/cycles/kernel/split/kernel_lamp_emission.h
M       intern/cycles/kernel/split/kernel_shader_eval.h

===================================================================

diff --git a/intern/cycles/device/device_split_kernel.cpp 
b/intern/cycles/device/device_split_kernel.cpp
index 9697411a23e..e8ea556bd4f 100644
--- a/intern/cycles/device/device_split_kernel.cpp
+++ b/intern/cycles/device/device_split_kernel.cpp
@@ -239,6 +239,7 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task,
                        for(int PathIter = 0; PathIter < 16; PathIter++) {
                                ENQUEUE_SPLIT_KERNEL(scene_intersect, 
global_size, local_size);
                                ENQUEUE_SPLIT_KERNEL(lamp_emission, 
global_size, local_size);
+                               ENQUEUE_SPLIT_KERNEL(shader_eval, global_size, 
local_size);
                                ENQUEUE_SPLIT_KERNEL(do_volume, global_size, 
local_size);
                                ENQUEUE_SPLIT_KERNEL(queue_enqueue, 
global_size, local_size);
                                ENQUEUE_SPLIT_KERNEL(indirect_background, 
global_size, local_size);
diff --git a/intern/cycles/kernel/kernel_types.h 
b/intern/cycles/kernel/kernel_types.h
index ed756096ebf..e1fdd1340db 100644
--- a/intern/cycles/kernel/kernel_types.h
+++ b/intern/cycles/kernel/kernel_types.h
@@ -1462,6 +1462,8 @@ enum RayState {
        RAY_VOLUME_INDIRECT_NEXT_ITER,
        RAY_SUBSURFACE_INDIRECT_NEXT_ITER,
 
+       RAY_STATE_ANY, /* Special, never assigned to a ray */
+
        /* Ray flags */
 
        /* Flags to denote that the ray is currently evaluating the branched 
indirect loop */
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h 
b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
index 6bcefe39ae5..98aaf6b7770 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
@@ -211,7 +211,7 @@ void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg,
 
 DEFINE_SPLIT_KERNEL_FUNCTION(path_init)
 DEFINE_SPLIT_KERNEL_FUNCTION(scene_intersect)
-DEFINE_SPLIT_KERNEL_FUNCTION(lamp_emission)
+DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(lamp_emission, uint)
 DEFINE_SPLIT_KERNEL_FUNCTION(do_volume)
 DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(queue_enqueue, QueueEnqueueLocals)
 DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(indirect_background, uint)
diff --git a/intern/cycles/kernel/kernels/cuda/kernel_split.cu 
b/intern/cycles/kernel/kernels/cuda/kernel_split.cu
index d64c8c66458..3f3915be981 100644
--- a/intern/cycles/kernel/kernels/cuda/kernel_split.cu
+++ b/intern/cycles/kernel/kernels/cuda/kernel_split.cu
@@ -104,7 +104,7 @@ kernel_cuda_path_trace_data_init(
 
 DEFINE_SPLIT_KERNEL_FUNCTION(path_init)
 DEFINE_SPLIT_KERNEL_FUNCTION(scene_intersect)
-DEFINE_SPLIT_KERNEL_FUNCTION(lamp_emission)
+DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(lamp_emission, uint)
 DEFINE_SPLIT_KERNEL_FUNCTION(do_volume)
 DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(queue_enqueue, QueueEnqueueLocals)
 DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(indirect_background, uint)
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_lamp_emission.cl 
b/intern/cycles/kernel/kernels/opencl/kernel_lamp_emission.cl
index c314dc96c33..0792fdc3171 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_lamp_emission.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_lamp_emission.cl
@@ -19,6 +19,8 @@
 #include "kernel/split/kernel_lamp_emission.h"
 
 #define KERNEL_NAME lamp_emission
+#define LOCALS_TYPE uint
 #include "kernel/kernels/opencl/kernel_split_function.h"
 #undef KERNEL_NAME
+#undef LOCALS_TYPE
 
diff --git a/intern/cycles/kernel/split/kernel_do_volume.h 
b/intern/cycles/kernel/split/kernel_do_volume.h
index fb5bd3d48dd..7296bb60846 100644
--- a/intern/cycles/kernel/split/kernel_do_volume.h
+++ b/intern/cycles/kernel/split/kernel_do_volume.h
@@ -111,6 +111,28 @@ ccl_device_noinline bool 
kernel_split_branched_path_volume_indirect_light_iter(K
 
 ccl_device void kernel_do_volume(KernelGlobals *kg)
 {
+       /* Finish up kernel_path_lamp_emission from kernel_lamp_emission 
kernel. */
+       int ray_index = ccl_global_id(1) * ccl_global_size(0) + 
ccl_global_id(0);
+
+       ray_index = get_ray_index(kg, ray_index,
+                                 QUEUE_SHADER_EVAL,
+                                 kernel_split_state.queue_data,
+                                 kernel_split_params.queue_size,
+                                 1);
+
+       if(ray_index != QUEUE_EMPTY_SLOT) {
+               PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+               ccl_global PathState *state = 
&kernel_split_state.path_state[ray_index];
+
+               float3 throughput = kernel_split_state.throughput[ray_index];
+               Ray ray = kernel_split_state.ray[ray_index];
+               ccl_global Intersection *isect = 
&kernel_split_state.isect[ray_index];
+               ShaderData *sd = kernel_split_sd(sd, ray_index);
+               LightSample ls = kernel_split_state.light_sample[ray_index];
+
+               kernel_path_lamp_emission_finish(kg, state, &ray, throughput, 
isect, sd, L, &ls);
+       }
+
 #ifdef __VOLUME__
        /* We will empty this queue in this kernel. */
        if(ccl_global_id(0) == 0 && ccl_global_id(1) == 0) {
@@ -120,7 +142,7 @@ ccl_device void kernel_do_volume(KernelGlobals *kg)
 #  endif  /* __BRANCHED_PATH__ */
        }
 
-       int ray_index = ccl_global_id(1) * ccl_global_size(0) + 
ccl_global_id(0);
+       ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
 
        if(*kernel_split_params.use_queues_flag) {
                ray_index = get_ray_index(kg, ray_index,
diff --git a/intern/cycles/kernel/split/kernel_lamp_emission.h 
b/intern/cycles/kernel/split/kernel_lamp_emission.h
index c14f66f664f..a94ef87aaed 100644
--- a/intern/cycles/kernel/split/kernel_lamp_emission.h
+++ b/intern/cycles/kernel/split/kernel_lamp_emission.h
@@ -20,14 +20,22 @@ CCL_NAMESPACE_BEGIN
  * It processes rays of state RAY_ACTIVE and RAY_HIT_BACKGROUND.
  * We will empty QUEUE_ACTIVE_AND_REGENERATED_RAYS queue in this kernel.
  */
-ccl_device void kernel_lamp_emission(KernelGlobals *kg)
+ccl_device void kernel_lamp_emission(KernelGlobals *kg, ccl_local_param uint 
*local_queue_atomics)
 {
+       if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
+               *local_queue_atomics = 0;
+       }
+       ccl_barrier(CCL_LOCAL_MEM_FENCE);
+
+       if(ccl_global_id(0) == 0 && ccl_global_id(1) == 0) {
+               kernel_split_params.shader_eval_queue = QUEUE_SHADER_EVAL;
+               kernel_split_params.shader_eval_state = RAY_STATE_ANY;
 #ifndef __VOLUME__
        /* We will empty this queue in this kernel. */
-       if(ccl_global_id(0) == 0 && ccl_global_id(1) == 0) {
                
kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0;
-       }
 #endif
+       }
+
        /* Fetch use_queues_flag. */
        char local_use_queues_flag = *kernel_split_params.use_queues_flag;
        ccl_barrier(CCL_LOCAL_MEM_FENCE);
@@ -49,19 +57,33 @@ ccl_device void kernel_lamp_emission(KernelGlobals *kg)
                }
        }
 
+       ShaderEvalTask *eval_task = 
&kernel_split_state.shader_eval_task[ray_index];
+       ShaderEvalIntent intent = SHADER_EVAL_INTENT_SKIP;
+
        if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE) ||
           IS_STATE(kernel_split_state.ray_state, ray_index, 
RAY_HIT_BACKGROUND))
        {
-               PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
                ccl_global PathState *state = 
&kernel_split_state.path_state[ray_index];
 
-               float3 throughput = kernel_split_state.throughput[ray_index];
                Ray ray = kernel_split_state.ray[ray_index];
                ccl_global Intersection *isect = 
&kernel_split_state.isect[ray_index];
                ShaderData *sd = kernel_split_sd(sd, ray_index);
+               LightSample ls = kernel_split_state.light_sample[ray_index];
 
-               kernel_path_lamp_emission(kg, state, &ray, throughput, isect, 
sd, L);
+               intent = kernel_path_lamp_emission_setup(kg, state, &ray, 
isect, sd, &ls);
+               if(intent) {
+                       shader_eval_task_setup(kg, eval_task, sd, intent);
+                       kernel_split_state.light_sample[ray_index] = ls;
+               }
        }
+
+       enqueue_ray_index_local(ray_index,
+                               QUEUE_SHADER_EVAL,
+                               intent != SHADER_EVAL_INTENT_SKIP,
+                               kernel_split_params.queue_size,
+                               local_queue_atomics,
+                               kernel_split_state.queue_data,
+                               kernel_split_params.queue_index);
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_shader_eval.h 
b/intern/cycles/kernel/split/kernel_shader_eval.h
index c53807f4e09..b75608f61f7 100644
--- a/intern/cycles/kernel/split/kernel_shader_eval.h
+++ b/intern/cycles/kernel/split/kernel_shader_eval.h
@@ -40,7 +40,7 @@ ccl_device void kernel_shader_eval(KernelGlobals *kg)
                return;
        }
 
-       if(IS_STATE(kernel_split_state.ray_state, ray_index, shade_state)) {
+       if(IS_STATE(kernel_split_state.ray_state, ray_index, shade_state) || 
shade_state == RAY_STATE_ANY) {
                ShaderEvalTask *eval_task = 
&kernel_split_state.shader_eval_task[ray_index];
                ShaderData *sd = (ShaderData*)(kernel_split_state.data + 
eval_task->sd_offset);
                ccl_global PathState *state = 
&kernel_split_state.path_state[ray_index];

_______________________________________________
Bf-blender-cvs mailing list
Bf-blender-cvs@blender.org
https://lists.blender.org/mailman/listinfo/bf-blender-cvs

Reply via email to