Commit: d75d262318c845058a6410fb4c764dadbaa20b6b Author: Mai Lavelle Date: Mon Nov 27 02:21:06 2017 -0500 Branches: split-kernel-faster-building https://developer.blender.org/rBd75d262318c845058a6410fb4c764dadbaa20b6b
Cycles: Split shader_eval out of the kernel_lamp_emission kernel With this kernels for BWM and classroom scenes are building in half the time as master. Render times are 1% faster as well. =================================================================== M intern/cycles/device/device_split_kernel.cpp M intern/cycles/kernel/kernel_types.h M intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h M intern/cycles/kernel/kernels/cuda/kernel_split.cu M intern/cycles/kernel/kernels/opencl/kernel_lamp_emission.cl M intern/cycles/kernel/split/kernel_do_volume.h M intern/cycles/kernel/split/kernel_lamp_emission.h M intern/cycles/kernel/split/kernel_shader_eval.h =================================================================== diff --git a/intern/cycles/device/device_split_kernel.cpp b/intern/cycles/device/device_split_kernel.cpp index 9697411a23e..e8ea556bd4f 100644 --- a/intern/cycles/device/device_split_kernel.cpp +++ b/intern/cycles/device/device_split_kernel.cpp @@ -239,6 +239,7 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task, for(int PathIter = 0; PathIter < 16; PathIter++) { ENQUEUE_SPLIT_KERNEL(scene_intersect, global_size, local_size); ENQUEUE_SPLIT_KERNEL(lamp_emission, global_size, local_size); + ENQUEUE_SPLIT_KERNEL(shader_eval, global_size, local_size); ENQUEUE_SPLIT_KERNEL(do_volume, global_size, local_size); ENQUEUE_SPLIT_KERNEL(queue_enqueue, global_size, local_size); ENQUEUE_SPLIT_KERNEL(indirect_background, global_size, local_size); diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h index ed756096ebf..e1fdd1340db 100644 --- a/intern/cycles/kernel/kernel_types.h +++ b/intern/cycles/kernel/kernel_types.h @@ -1462,6 +1462,8 @@ enum RayState { RAY_VOLUME_INDIRECT_NEXT_ITER, RAY_SUBSURFACE_INDIRECT_NEXT_ITER, + RAY_STATE_ANY, /* Special, never assigned to a ray */ + /* Ray flags */ /* Flags to denote that the ray is currently evaluating the branched indirect loop */ diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h index 6bcefe39ae5..98aaf6b7770 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h +++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h @@ -211,7 +211,7 @@ void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg, DEFINE_SPLIT_KERNEL_FUNCTION(path_init) DEFINE_SPLIT_KERNEL_FUNCTION(scene_intersect) -DEFINE_SPLIT_KERNEL_FUNCTION(lamp_emission) +DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(lamp_emission, uint) DEFINE_SPLIT_KERNEL_FUNCTION(do_volume) DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(queue_enqueue, QueueEnqueueLocals) DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(indirect_background, uint) diff --git a/intern/cycles/kernel/kernels/cuda/kernel_split.cu b/intern/cycles/kernel/kernels/cuda/kernel_split.cu index d64c8c66458..3f3915be981 100644 --- a/intern/cycles/kernel/kernels/cuda/kernel_split.cu +++ b/intern/cycles/kernel/kernels/cuda/kernel_split.cu @@ -104,7 +104,7 @@ kernel_cuda_path_trace_data_init( DEFINE_SPLIT_KERNEL_FUNCTION(path_init) DEFINE_SPLIT_KERNEL_FUNCTION(scene_intersect) -DEFINE_SPLIT_KERNEL_FUNCTION(lamp_emission) +DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(lamp_emission, uint) DEFINE_SPLIT_KERNEL_FUNCTION(do_volume) DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(queue_enqueue, QueueEnqueueLocals) DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(indirect_background, uint) diff --git a/intern/cycles/kernel/kernels/opencl/kernel_lamp_emission.cl b/intern/cycles/kernel/kernels/opencl/kernel_lamp_emission.cl index c314dc96c33..0792fdc3171 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_lamp_emission.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_lamp_emission.cl @@ -19,6 +19,8 @@ #include "kernel/split/kernel_lamp_emission.h" #define KERNEL_NAME lamp_emission +#define LOCALS_TYPE uint #include "kernel/kernels/opencl/kernel_split_function.h" #undef KERNEL_NAME +#undef LOCALS_TYPE diff --git a/intern/cycles/kernel/split/kernel_do_volume.h b/intern/cycles/kernel/split/kernel_do_volume.h index fb5bd3d48dd..7296bb60846 100644 --- a/intern/cycles/kernel/split/kernel_do_volume.h +++ b/intern/cycles/kernel/split/kernel_do_volume.h @@ -111,6 +111,28 @@ ccl_device_noinline bool kernel_split_branched_path_volume_indirect_light_iter(K ccl_device void kernel_do_volume(KernelGlobals *kg) { + /* Finish up kernel_path_lamp_emission from kernel_lamp_emission kernel. */ + int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); + + ray_index = get_ray_index(kg, ray_index, + QUEUE_SHADER_EVAL, + kernel_split_state.queue_data, + kernel_split_params.queue_size, + 1); + + if(ray_index != QUEUE_EMPTY_SLOT) { + PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; + ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; + + float3 throughput = kernel_split_state.throughput[ray_index]; + Ray ray = kernel_split_state.ray[ray_index]; + ccl_global Intersection *isect = &kernel_split_state.isect[ray_index]; + ShaderData *sd = kernel_split_sd(sd, ray_index); + LightSample ls = kernel_split_state.light_sample[ray_index]; + + kernel_path_lamp_emission_finish(kg, state, &ray, throughput, isect, sd, L, &ls); + } + #ifdef __VOLUME__ /* We will empty this queue in this kernel. */ if(ccl_global_id(0) == 0 && ccl_global_id(1) == 0) { @@ -120,7 +142,7 @@ ccl_device void kernel_do_volume(KernelGlobals *kg) # endif /* __BRANCHED_PATH__ */ } - int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); + ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); if(*kernel_split_params.use_queues_flag) { ray_index = get_ray_index(kg, ray_index, diff --git a/intern/cycles/kernel/split/kernel_lamp_emission.h b/intern/cycles/kernel/split/kernel_lamp_emission.h index c14f66f664f..a94ef87aaed 100644 --- a/intern/cycles/kernel/split/kernel_lamp_emission.h +++ b/intern/cycles/kernel/split/kernel_lamp_emission.h @@ -20,14 +20,22 @@ CCL_NAMESPACE_BEGIN * It processes rays of state RAY_ACTIVE and RAY_HIT_BACKGROUND. * We will empty QUEUE_ACTIVE_AND_REGENERATED_RAYS queue in this kernel. */ -ccl_device void kernel_lamp_emission(KernelGlobals *kg) +ccl_device void kernel_lamp_emission(KernelGlobals *kg, ccl_local_param uint *local_queue_atomics) { + if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) { + *local_queue_atomics = 0; + } + ccl_barrier(CCL_LOCAL_MEM_FENCE); + + if(ccl_global_id(0) == 0 && ccl_global_id(1) == 0) { + kernel_split_params.shader_eval_queue = QUEUE_SHADER_EVAL; + kernel_split_params.shader_eval_state = RAY_STATE_ANY; #ifndef __VOLUME__ /* We will empty this queue in this kernel. */ - if(ccl_global_id(0) == 0 && ccl_global_id(1) == 0) { kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0; - } #endif + } + /* Fetch use_queues_flag. */ char local_use_queues_flag = *kernel_split_params.use_queues_flag; ccl_barrier(CCL_LOCAL_MEM_FENCE); @@ -49,19 +57,33 @@ ccl_device void kernel_lamp_emission(KernelGlobals *kg) } } + ShaderEvalTask *eval_task = &kernel_split_state.shader_eval_task[ray_index]; + ShaderEvalIntent intent = SHADER_EVAL_INTENT_SKIP; + if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE) || IS_STATE(kernel_split_state.ray_state, ray_index, RAY_HIT_BACKGROUND)) { - PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; - float3 throughput = kernel_split_state.throughput[ray_index]; Ray ray = kernel_split_state.ray[ray_index]; ccl_global Intersection *isect = &kernel_split_state.isect[ray_index]; ShaderData *sd = kernel_split_sd(sd, ray_index); + LightSample ls = kernel_split_state.light_sample[ray_index]; - kernel_path_lamp_emission(kg, state, &ray, throughput, isect, sd, L); + intent = kernel_path_lamp_emission_setup(kg, state, &ray, isect, sd, &ls); + if(intent) { + shader_eval_task_setup(kg, eval_task, sd, intent); + kernel_split_state.light_sample[ray_index] = ls; + } } + + enqueue_ray_index_local(ray_index, + QUEUE_SHADER_EVAL, + intent != SHADER_EVAL_INTENT_SKIP, + kernel_split_params.queue_size, + local_queue_atomics, + kernel_split_state.queue_data, + kernel_split_params.queue_index); } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/split/kernel_shader_eval.h b/intern/cycles/kernel/split/kernel_shader_eval.h index c53807f4e09..b75608f61f7 100644 --- a/intern/cycles/kernel/split/kernel_shader_eval.h +++ b/intern/cycles/kernel/split/kernel_shader_eval.h @@ -40,7 +40,7 @@ ccl_device void kernel_shader_eval(KernelGlobals *kg) return; } - if(IS_STATE(kernel_split_state.ray_state, ray_index, shade_state)) { + if(IS_STATE(kernel_split_state.ray_state, ray_index, shade_state) || shade_state == RAY_STATE_ANY) { ShaderEvalTask *eval_task = &kernel_split_state.shader_eval_task[ray_index]; ShaderData *sd = (ShaderData*)(kernel_split_state.data + eval_task->sd_offset); ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; _______________________________________________ Bf-blender-cvs mailing list Bf-blender-cvs@blender.org https://lists.blender.org/mailman/listinfo/bf-blender-cvs