Commit: ef863d644ff3fcafb614d351723bc2d997dbf068
Author: varunsundar08
Date:   Mon Apr 27 19:10:49 2015 +0530
Branches: cycles_kernel_split
https://developer.blender.org/rBef863d644ff3fcafb614d351723bc2d997dbf068

Add OpenCLDeviceSplitKernel class

===================================================================

M       intern/cycles/device/device_opencl.cpp

===================================================================

diff --git a/intern/cycles/device/device_opencl.cpp 
b/intern/cycles/device/device_opencl.cpp
index cc184c6..605644d 100644
--- a/intern/cycles/device/device_opencl.cpp
+++ b/intern/cycles/device/device_opencl.cpp
@@ -1678,6 +1678,7 @@ public:
 
                current_clos_max = clos_max;
 
+               /* TODO : Add macros to kernel_ocl_path_trace in kernel.cl to 
avoid megakernel build */
                kernel_init_source = "#include \"kernel.cl\" // " + kernel_md5 
+ "\n";
                device_md5 = device_md5_hash("");
                clbin = string_printf("cycles_kernel_%s_%s.clbin", 
device_md5.c_str(), kernel_md5.c_str());
@@ -3518,6 +3519,2946 @@ The current tile of dimensions %dx%d is split into 
tiles of dimension %dx%d for
        }
 };
 
+/* OpenCLDeviceSplitKernel's declaration/definition */
+class OpenCLDeviceSplitKernel : public Device
+{
+public:
+       DedicatedTaskPool task_pool;
+       cl_context cxContext;
+       cl_command_queue cqCommandQueue;
+       cl_platform_id cpPlatform;
+       cl_device_id cdDevice;
+       cl_int ciErr;
+
+       /* Kernel declaration */
+       cl_kernel ckPathTraceKernel_DataInit_SPLIT_KERNEL;
+       cl_kernel ckPathTraceKernel_SceneIntersect_SPLIT_KERNEL;
+       cl_kernel ckPathTraceKernel_LampEmission_SPLIT_KERNEL;
+       cl_kernel ckPathTraceKernel_QueueEnqueue_SPLIT_KERNEL;
+       cl_kernel ckPathTraceKernel_BG_BufferUpdate_SPLIT_KERNEL;
+       cl_kernel ckPathTraceKernel_Shader_Lighting_SPLIT_KERNEL;
+       cl_kernel 
ckPathTraceKernel_Holdout_Emission_Blurring_Pathtermination_AO_SPLIT_KERNEL;
+       cl_kernel ckPathTraceKernel_Subsurface_SPLIT_KERNEL;
+       cl_kernel ckPathTraceKernel_DirectLighting_SPLIT_KERNEL;
+       cl_kernel ckPathTraceKernel_ShadowBlocked_DirectLighting_SPLIT_KERNEL;
+       cl_kernel ckPathTraceKernel_SetUpNextIteration_SPLIT_KERNEL;
+       cl_kernel ckPathTraceKernel_SumAllRadiance_SPLIT_KERNEL;
+       cl_kernel ckShaderKernel;
+       cl_kernel ckBakeKernel;
+       cl_kernel ckFilmConvertByteKernel;
+       cl_kernel ckFilmConvertHalfFloatKernel;
+
+       /* cl_program declaration */
+       cl_program dataInit_program;
+       cl_program sceneIntersect_program;
+       cl_program lampEmission_program;
+       cl_program QueueEnqueue_program;
+       cl_program background_BufferUpdate_program;
+       cl_program shaderEval_program;
+       cl_program holdout_emission_blurring_termination_ao_program;
+       cl_program subsurface_program;
+       cl_program directLighting_program;
+       cl_program shadowBlocked_program;
+       cl_program nextIterationSetUp_program;
+       cl_program sumAllRadiance_program;
+       cl_program cpProgram;
+
+       /* Global memory variables [porting]; These memory is used for
+       * co-operation between different kernels; Data written by one
+       * kernel will be avaible to another kernel via this global
+       * memory
+       */
+       cl_mem rng_coop;
+       cl_mem throughput_coop;
+       cl_mem L_transparent_coop;
+       cl_mem PathRadiance_coop;
+       cl_mem Ray_coop;
+       cl_mem PathState_coop;
+       cl_mem Intersection_coop;
+       cl_mem kgbuffer; /* KernelGlobals buffer */
+
+       /* global buffers for ShaderData */
+       cl_mem sd;                      /* ShaderData used in the main 
path-iteration loop */
+       cl_mem sd_DL_shadow;            /* ShaderData used in Direct Lighting 
and ShadowBlocked kernel */
+
+       /* global buffers of each member of ShaderData */
+       cl_mem P_sd;
+       cl_mem P_sd_DL_shadow;
+       cl_mem N_sd;
+       cl_mem N_sd_DL_shadow;
+       cl_mem Ng_sd;
+       cl_mem Ng_sd_DL_shadow;
+       cl_mem I_sd;
+       cl_mem I_sd_DL_shadow;
+       cl_mem shader_sd;
+       cl_mem shader_sd_DL_shadow;
+       cl_mem flag_sd;
+       cl_mem flag_sd_DL_shadow;
+       cl_mem prim_sd;
+       cl_mem prim_sd_DL_shadow;
+       cl_mem type_sd;
+       cl_mem type_sd_DL_shadow;
+       cl_mem u_sd;
+       cl_mem u_sd_DL_shadow;
+       cl_mem v_sd;
+       cl_mem v_sd_DL_shadow;
+       cl_mem object_sd;
+       cl_mem object_sd_DL_shadow;
+       cl_mem time_sd;
+       cl_mem time_sd_DL_shadow;
+       cl_mem ray_length_sd;
+       cl_mem ray_length_sd_DL_shadow;
+       cl_mem ray_depth_sd;
+       cl_mem ray_depth_sd_DL_shadow;
+       cl_mem transparent_depth_sd;
+       cl_mem transparent_depth_sd_DL_shadow;
+#ifdef __RAY_DIFFERENTIALS__
+       cl_mem dP_sd, dI_sd;
+       cl_mem dP_sd_DL_shadow, dI_sd_DL_shadow;
+       cl_mem du_sd, dv_sd;
+       cl_mem du_sd_DL_shadow, dv_sd_DL_shadow;
+#endif
+#ifdef __DPDU__
+       cl_mem dPdu_sd, dPdv_sd;
+       cl_mem dPdu_sd_DL_shadow, dPdv_sd_DL_shadow;
+#endif
+       cl_mem closure_sd;
+       cl_mem closure_sd_DL_shadow;
+       cl_mem num_closure_sd;
+       cl_mem num_closure_sd_DL_shadow;
+       cl_mem randb_closure_sd;
+       cl_mem randb_closure_sd_DL_shadow;
+       cl_mem ray_P_sd;
+       cl_mem ray_P_sd_DL_shadow;
+       cl_mem ray_dP_sd;
+       cl_mem ray_dP_sd_DL_shadow;
+
+       /* Global memory required for shadow blocked and accum_radiance */
+       cl_mem BSDFEval_coop;
+       cl_mem ISLamp_coop;
+       cl_mem LightRay_coop;
+       cl_mem AOAlpha_coop;
+       cl_mem AOBSDF_coop;
+       cl_mem AOLightRay_coop;
+       cl_mem Intersection_coop_AO;
+       cl_mem Intersection_coop_DL;
+
+       /* Global state array that tracks ray state */
+       cl_mem ray_state;
+
+       /* per sample buffers */
+       cl_mem per_sample_output_buffers;
+
+       /* Denotes which sample each ray is being processed for */
+       cl_mem work_array;
+
+       /* Queue*/
+       cl_mem Queue_data;  /* Array of size queuesize * num_queues * 
sizeof(int) */
+       cl_mem Queue_index; /* Array of size num_queues * sizeof(int); Tracks 
the size of each queue */
+
+       /* Flag to make sceneintersect and lampemission kernel use queues */
+       cl_mem use_queues_flag;
+
+       /* Required-memory size */
+       size_t rng_size;
+       size_t throughput_size;
+       size_t L_transparent_size;
+       size_t rayState_size;
+       size_t hostRayState_size;
+       size_t work_element_size;
+       size_t ISLamp_size;
+
+       /* size of structures declared in kernel_types.h */
+       size_t PathRadiance_size;
+       size_t Ray_size;
+       size_t PathState_size;
+       size_t Intersection_size;
+
+       /* Sizes of memory required for shadow blocked function */
+       size_t AOAlpha_size;
+       size_t AOBSDF_size;
+       size_t AOLightRay_size;
+       size_t LightRay_size;
+       size_t BSDFEval_size;
+       size_t Intersection_coop_AO_size;
+       size_t Intersection_coop_DL_size;
+
+       /* Amount of memory in output buffer associated with one pixel/thread */
+       size_t per_thread_output_buffer_size;
+
+       /* Total allocatable available device memory */
+       size_t total_allocatable_memory;
+
+       /* host version of ray_state; Used in checking host path-iteration 
termination */
+       char *hostRayStateArray;
+
+       /* Number of path-iterations to be done in one shot */
+       unsigned int PathIteration_times;
+
+       /* Denotes if the render is background or foreground */
+       bool background;
+
+#ifdef __WORK_STEALING__
+       /* Work pool with respect to each work group */
+       cl_mem work_pool_wgs;
+
+       /* Denotes the maximum work groups possible w.r.t. current tile size */
+       unsigned int max_work_groups;
+#endif
+
+       /* clos_max value for which the kernels have been loaded currently */
+       int current_clos_max;
+
+       /* Marked True in constructor and marked false at the end of 
path_trace() */
+       bool first_tile;
+
+       typedef map<string, device_vector<uchar>*> ConstMemMap;
+       typedef map<string, device_ptr> MemMap;
+
+       ConstMemMap const_mem_map;
+       MemMap mem_map;
+       device_ptr null_mem;
+
+       bool device_initialized;
+       string platform_name;
+
+       bool opencl_error(cl_int err)
+       {
+               if (err != CL_SUCCESS) {
+                       string message = string_printf("OpenCL error (%d): %s", 
err, clewErrorString(err));
+                       if (error_msg == "")
+                               error_msg = message;
+                       fprintf(stderr, "%s\n", message.c_str());
+                       return true;
+               }
+
+               return false;
+       }
+
+       void opencl_error(const string& message)
+       {
+               if (error_msg == "")
+                       error_msg = message;
+               fprintf(stderr, "%s\n", message.c_str());
+       }
+
+#define opencl_assert(stmt) \
+       { \
+       cl_int err = stmt; \
+       \
+       if (err != CL_SUCCESS) { \
+       string message = string_printf("OpenCL error: %s in %s", 
clewErrorString(err), #stmt); \
+       if (error_msg == "") \
+       error_msg = message; \
+       fprintf(stderr, "%s\n", message.c_str()); \
+       } \
+       } (void)0
+
+       void opencl_assert_err(cl_int err, const char* where)
+       {
+               if (err != CL_SUCCESS) {
+                       string message = string_printf("OpenCL error (%d): %s 
in %s", err, clewErrorString(err), where);
+                       if (error_msg == "")
+                               error_msg = message;
+                       fprintf(stderr, "%s\n", message.c_str());
+#ifndef NDEBUG
+                       abort();
+#endif
+               }
+       }
+
+       OpenCLDeviceSplitKernel(DeviceInfo& info, Stats &stats, bool 
background_)
+               : Device(info, stats, background_)
+       {
+               cpPlatform = NULL;
+               cdDevice = NULL;
+               cxContext = NULL;
+               cqCommandQueue = NULL;
+               null_mem = 0;
+               device_initialized = false;
+
+               use_split_kernel = true;
+               background = background_;
+
+               /* Initialize kernels */
+               ckPathTraceKernel_DataInit_SPLIT_KERNEL = NULL;
+               ckPathTraceKernel_SceneIntersect_SPLIT_KERNEL = NULL;
+               ckPathTraceKernel_LampEmission_SPLIT_KERNEL = NULL;
+               ckPathTraceKernel_BG_BufferUpdate_SPLIT_KERNEL = NULL;
+               ckPathTraceKernel_Shader_Lighting_SPLIT_KERNEL = NULL;
+               
ckPathTraceKernel_Holdout_Emission_Blurring_Pathtermination_AO_SPLIT_KERNEL = 
NULL;
+               ckPathTraceKernel_Subsurface_SPLIT_KERNEL = NULL;
+               ckPathTraceKernel_DirectLighting_SPLIT_KERNEL = NULL;
+               ckPathTraceKernel_ShadowBlocked_DirectLighting_SPLIT_KERNEL = 
NULL;
+               ckPathTraceKernel_SetUpNextIteration_SPLIT_KERNEL = NULL;
+               ckPathTraceKernel_SumAllRadiance_SPLIT_KERNEL = NULL;
+               ckPathTraceKernel_QueueEnqueue_SPLIT_KERNEL = NULL;
+               ckShaderKernel = NULL;
+               ckBakeKernel = NULL;
+               ckFilmConvertByteKernel = NULL;
+               ckFilmConvertHalfFloatKernel = NULL;
+
+               /* Initialize program */
+               dataInit_program = NULL;
+               sceneIntersect_program = NULL;
+               lampEmission_program = NULL;
+               QueueEnqueue_program = NULL;
+               background_BufferUpdate_program = NULL;
+               shaderEval_program = NULL;
+               holdout_emission_blurring_termination_ao_program = NULL;
+               subsurface_program = NULL;
+               directLighting_program = NULL;
+               shadowBlocked_program = NULL;
+               nextIterationSetUp_program = NULL;
+               sumAllRadiance_program = NULL;
+               cpProgram = NULL;
+
+               /* Initialize cl_mem variables */
+               kgbuffer = NULL;
+               sd = NULL;
+               sd_DL_shadow = NULL;
+
+               P_sd = NULL;
+               P_sd_DL_shadow = NULL;
+               N_sd = NULL;
+               N_sd_DL_shadow = NULL;
+               Ng_sd = NULL;
+               Ng_sd_DL_shadow = NULL;
+               I_sd = NULL;
+               I_sd_DL_shadow = NULL;
+               shader_sd = NULL;
+               shader_sd_DL_shadow = NULL;
+               flag_sd = NULL;
+               flag_sd_DL_shadow = NULL;
+               prim_sd = NULL;
+               prim_sd_DL_shadow = NULL;
+               type_sd = NULL;
+               type_sd_DL_shadow = NULL;
+               u_sd = NULL;
+               u_sd_DL_shadow = NULL;
+               v_sd = NULL;
+               v_sd_DL_shadow = NULL;
+               object_sd = NULL;
+               object_sd_DL_shadow = NULL

@@ Diff output truncated at 10240 characters. @@

_______________________________________________
Bf-blender-cvs mailing list
[email protected]
http://lists.blender.org/mailman/listinfo/bf-blender-cvs

Reply via email to