Commit: 86f3d223fd4356af549057a70cd90674c6b69a75 Author: Mai Lavelle Date: Mon Oct 17 13:47:19 2016 +0200 Branches: cycles_split_kernel https://developer.blender.org/rB86f3d223fd4356af549057a70cd90674c6b69a75
Cycles: Refactor so all split kernels have same signature This is to set things up for supporting the split kernel on cpu and other devices. By having the same signature for each kernel we can deduplicate a lot of code without needing any trickery. The only kernel that doesn't share this signature is the `data_init` kernel, which might end up being different for each device type. =================================================================== M intern/cycles/device/opencl/opencl_split.cpp M intern/cycles/kernel/kernel_globals.h M intern/cycles/kernel/kernels/opencl/kernel_background_buffer_update.cl M intern/cycles/kernel/kernels/opencl/kernel_data_init.cl M intern/cycles/kernel/kernels/opencl/kernel_direct_lighting.cl M intern/cycles/kernel/kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl M intern/cycles/kernel/kernels/opencl/kernel_lamp_emission.cl M intern/cycles/kernel/kernels/opencl/kernel_next_iteration_setup.cl M intern/cycles/kernel/kernels/opencl/kernel_queue_enqueue.cl M intern/cycles/kernel/kernels/opencl/kernel_scene_intersect.cl M intern/cycles/kernel/kernels/opencl/kernel_shader_eval.cl M intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked.cl M intern/cycles/kernel/kernels/opencl/kernel_sum_all_radiance.cl M intern/cycles/kernel/split/kernel_data_init.h M intern/cycles/kernel/split/kernel_split_data.h =================================================================== diff --git a/intern/cycles/device/opencl/opencl_split.cpp b/intern/cycles/device/opencl/opencl_split.cpp index ce0c702..fc80173 100644 --- a/intern/cycles/device/opencl/opencl_split.cpp +++ b/intern/cycles/device/opencl/opencl_split.cpp @@ -223,6 +223,7 @@ public: void *sd_input; void *isect_shadow; SplitData split_data; + SplitParams split_param_data; } KernelGlobals; return sizeof(KernelGlobals); @@ -422,6 +423,7 @@ public: kernel_set_args(program_data_init(), start_arg_index, start_sample, + end_sample, d_x, d_y, d_w, @@ -438,128 +440,24 @@ public: work_pool_wgs, num_samples, #endif - num_parallel_samples); - - //printf("kernel_set_args scene_intersect\n"); - kernel_set_args(program_scene_intersect(), - 0, - kgbuffer, - d_data, - d_w, - d_h, - Queue_index, - dQueue_size, - use_queues_flag, - num_parallel_samples); - - //printf("kernel_set_args lamp_emission\n"); - kernel_set_args(program_lamp_emission(), - 0, - kgbuffer, - d_data, - d_w, - d_h, - Queue_index, - dQueue_size, - use_queues_flag, - num_parallel_samples); - - //printf("kernel_set_args queue_enqueue\n"); - kernel_set_args(program_queue_enqueue(), - 0, - kgbuffer, - d_data, - Queue_index, - dQueue_size); - - //printf("kernel_set_args background_buffer_update\n"); - kernel_set_args(program_background_buffer_update(), - 0, - kgbuffer, - d_data, - d_rng_state, - d_w, - d_h, - d_x, - d_y, - d_stride, - rtile.rng_state_offset_x, - rtile.rng_state_offset_y, - rtile.buffer_rng_state_stride, - Queue_index, - dQueue_size, - end_sample, - start_sample, -#ifdef __WORK_STEALING__ - work_pool_wgs, - num_samples, -#endif - num_parallel_samples); - - //printf("kernel_set_args shader_eval\n"); - kernel_set_args(program_shader_eval(), - 0, - kgbuffer, - d_data, - Queue_index, - dQueue_size); - - //printf("kernel_set_args holdout_emission_blurring_pathtermination_ao\n"); - kernel_set_args(program_holdout_emission_blurring_pathtermination_ao(), - 0, - kgbuffer, - d_data, - d_w, - d_h, - d_x, - d_y, - d_stride, - Queue_index, - dQueue_size, -#ifdef __WORK_STEALING__ - start_sample, -#endif - num_parallel_samples); - - //printf("kernel_set_args direct_lighting\n"); - kernel_set_args(program_direct_lighting(), - 0, - kgbuffer, - d_data, - Queue_index, - dQueue_size); - - //printf("kernel_set_args shadow_blocked\n"); - kernel_set_args(program_shadow_blocked(), - 0, - kgbuffer, - d_data, - Queue_index, - dQueue_size); - - //printf("kernel_set_args next_iteration_setup\n"); - kernel_set_args(program_next_iteration_setup(), - 0, - kgbuffer, - d_data, - Queue_index, - dQueue_size, - use_queues_flag); - - //printf("kernel_set_args sum_all_radiance\n"); - kernel_set_args(program_sum_all_radiance(), - 0, - kgbuffer, - d_data, - d_buffer, - num_parallel_samples, - d_w, - d_h, - d_stride, - rtile.buffer_offset_x, - rtile.buffer_offset_y, - rtile.buffer_rng_state_stride, - start_sample); + num_parallel_samples, + rtile.buffer_offset_x, + rtile.buffer_offset_y, + rtile.buffer_rng_state_stride, + d_buffer); + +#define KERNEL_SET_ARGS(name) kernel_set_args(program_##name(), 0, kgbuffer, d_data); + KERNEL_SET_ARGS(scene_intersect); + KERNEL_SET_ARGS(lamp_emission); + KERNEL_SET_ARGS(queue_enqueue); + KERNEL_SET_ARGS(background_buffer_update); + KERNEL_SET_ARGS(shader_eval); + KERNEL_SET_ARGS(holdout_emission_blurring_pathtermination_ao); + KERNEL_SET_ARGS(direct_lighting); + KERNEL_SET_ARGS(shadow_blocked); + KERNEL_SET_ARGS(next_iteration_setup); + KERNEL_SET_ARGS(sum_all_radiance); +#undef KERNEL_SET_ARGS /* Macro for Enqueuing split kernels. */ #define GLUE(a, b) a ## b diff --git a/intern/cycles/kernel/kernel_globals.h b/intern/cycles/kernel/kernel_globals.h index 87ee551..e40ed05 100644 --- a/intern/cycles/kernel/kernel_globals.h +++ b/intern/cycles/kernel/kernel_globals.h @@ -103,6 +103,7 @@ typedef ccl_addr_space struct KernelGlobals { ShaderData *sd_input; Intersection *isect_shadow; SplitData split_data; + SplitParams split_param_data; # endif } KernelGlobals; diff --git a/intern/cycles/kernel/kernels/opencl/kernel_background_buffer_update.cl b/intern/cycles/kernel/kernels/opencl/kernel_background_buffer_update.cl index 5fcfd82..bb8217e 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_background_buffer_update.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_background_buffer_update.cl @@ -18,21 +18,7 @@ __kernel void kernel_ocl_path_trace_background_buffer_update( KernelGlobals *kg, - ccl_constant KernelData *data, - ccl_global uint *rng_state, - int sw, int sh, int sx, int sy, int stride, - int rng_state_offset_x, - int rng_state_offset_y, - int rng_state_stride, - ccl_global int *Queue_index, /* Tracks the number of elements in each queue */ - int queuesize, /* Size (capacity) of each queue */ - int end_sample, - int start_sample, -#ifdef __WORK_STEALING__ - ccl_global unsigned int *work_pool_wgs, - unsigned int num_samples, -#endif - int parallel_samples) /* Number of samples to be processed in parallel */ + ccl_constant KernelData *data) { ccl_local unsigned int local_queue_atomics; if(get_local_id(0) == 0 && get_local_id(1) == 0) { @@ -43,13 +29,13 @@ __kernel void kernel_ocl_path_trace_background_buffer_update( int ray_index = get_global_id(1) * get_global_size(0) + get_global_id(0); if(ray_index == 0) { /* We will empty this queue in this kernel. */ - Queue_index[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] = 0; + split_params->queue_index[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] = 0; } char enqueue_flag = 0; ray_index = get_ray_index(ray_index, QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS, split_state->queue_data, - queuesize, + split_params->queue_size, 1); #ifdef __COMPUTE_DEVICE_GPU__ @@ -70,18 +56,22 @@ __kernel void kernel_ocl_path_trace_background_buffer_update( #endif enqueue_flag = kernel_background_buffer_update(kg, - rng_state, - sw, sh, sx, sy, stride, - rng_state_offset_x, - rng_state_offset_y, - rng_state_stride, - end_sample, - start_sample, + split_params->rng_state, + split_params->w, + split_params->h, + split_params->x, + split_params->y, + split_params->stride, + split_params->rng_offset_x, + split_params->rng_offset_y, + split_params->rng_stride, + split_params->end_sample, + split_params->start_sample, #ifdef __WORK_STEALING__ - work_pool_wgs, - num_samples, + split_params->work_pool_wgs, + split_params->num_samples, #endif - parallel_samples, + split_params->parallel_samples, ray_index); #ifndef __COMPUTE_DEVICE_GPU__ } @@ -93,8 +83,8 @@ __kernel void kernel_ocl_path_trace_background_buffer_update( enqueue_ray_index_local(ray_index, QUEUE_ACTIVE_AND_REGENERATED_RAYS, enqueue_flag, - queuesize, + split_params->queue_size, &local_queue_atomics, split_state->queue_data, - Queue_index); + split_params->queue_index); } diff --git a/intern/cycles/kernel/kernels/opencl/kernel_data_init.cl b/intern/cycles/kernel/kernels/opencl/kernel_data_init.cl index f4f0f0d..205e65a 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_data_init.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_data_init.cl @@ -28,7 +28,9 @@ __kernel void kernel_ocl_path_tr @@ Diff output truncated at 10240 characters. @@ _______________________________________________ Bf-blender-cvs mailing list Bf-blender-cvs@blender.org https://lists.blender.org/mailman/listinfo/bf-blender-cvs