Commit: 70c659b77e11ac339775fdc4f5dc30947d7f9815
Author: Mai Lavelle
Date:   Tue Oct 18 16:51:02 2016 +0200
Branches: cycles_split_kernel
https://developer.blender.org/rB70c659b77e11ac339775fdc4f5dc30947d7f9815

Cycles: Add SplitKernelFunction with OpenCL implementation

SplitKernelFunction can represent a split kernel function for any device its
been implemented for. Currently this is only for OpenCL to simplify the
enqueueing of the split kernels and move another step closer to a split
kernel that can run on any device.

===================================================================

M       intern/cycles/device/device.h
M       intern/cycles/device/opencl/opencl.h
M       intern/cycles/device/opencl/opencl_mega.cpp
M       intern/cycles/device/opencl/opencl_split.cpp

===================================================================

diff --git a/intern/cycles/device/device.h b/intern/cycles/device/device.h
index 014e5fc..f79678d 100644
--- a/intern/cycles/device/device.h
+++ b/intern/cycles/device/device.h
@@ -190,6 +190,28 @@ public:
 std::ostream& operator <<(std::ostream &os,
                           const DeviceRequestedFeatures& requested_features);
 
+/* Types used for split kernel */
+
+class KernelDimensions {
+public:
+       size_t global_size[2];
+       size_t local_size[2];
+
+       KernelDimensions(size_t global_size_[2], size_t local_size_[2])
+       {
+               memcpy(global_size, global_size_, 2*sizeof(size_t));
+               memcpy(local_size, local_size_, 2*sizeof(size_t));
+       }
+};
+
+class SplitKernelFunction {
+public:
+       virtual ~SplitKernelFunction() {}
+
+       /* enqueue the kernel, returns false if there is an error */
+       virtual bool enqueue(const KernelDimensions& dim, device_memory& kg, 
device_memory& data) = 0;
+};
+
 /* Device */
 
 struct DeviceDrawParams {
@@ -273,6 +295,18 @@ public:
                const DeviceRequestedFeatures& /*requested_features*/)
        { return true; }
 
+       /* split kernel */
+       virtual bool enqueue_split_kernel_data_init()
+       {
+               assert(!"not implemented for this device");
+               return false;
+       }
+       virtual SplitKernelFunction* get_split_kernel_function(string 
/*kernel_name*/, const DeviceRequestedFeatures&)
+       {
+               assert(!"not implemented for this device");
+               return NULL;
+       }
+
        /* tasks */
        virtual int get_split_task_count(DeviceTask& task) = 0;
        virtual void task_add(DeviceTask& task) = 0;
diff --git a/intern/cycles/device/opencl/opencl.h 
b/intern/cycles/device/opencl/opencl.h
index 83603b2..dc2a5b2 100644
--- a/intern/cycles/device/opencl/opencl.h
+++ b/intern/cycles/device/opencl/opencl.h
@@ -261,7 +261,7 @@ public:
 
        /* Has to be implemented by the real device classes.
         * The base device will then load all these programs. */
-       virtual void load_kernels(const DeviceRequestedFeatures& 
requested_features,
+       virtual bool load_kernels(const DeviceRequestedFeatures& 
requested_features,
                                  vector<OpenCLProgram*> &programs) = 0;
 
        void mem_alloc(device_memory& mem, MemoryType type);
@@ -417,6 +417,8 @@ protected:
 
        virtual string build_options_for_base_program(
                const DeviceRequestedFeatures& /*requested_features*/);
+
+       friend class OpenCLSplitKernelFunction;
 };
 
 Device *opencl_create_mega_device(DeviceInfo& info, Stats& stats, bool 
background);
diff --git a/intern/cycles/device/opencl/opencl_mega.cpp 
b/intern/cycles/device/opencl/opencl_mega.cpp
index 369c086..65feba8 100644
--- a/intern/cycles/device/opencl/opencl_mega.cpp
+++ b/intern/cycles/device/opencl/opencl_mega.cpp
@@ -39,11 +39,12 @@ public:
        {
        }
 
-       virtual void load_kernels(const DeviceRequestedFeatures& 
/*requested_features*/,
+       virtual bool load_kernels(const DeviceRequestedFeatures& 
/*requested_features*/,
                                  vector<OpenCLProgram*> &programs)
        {
                path_trace_program.add_kernel(ustring("path_trace"));
                programs.push_back(&path_trace_program);
+               return true;
        }
 
        ~OpenCLDeviceMegaKernel()
diff --git a/intern/cycles/device/opencl/opencl_split.cpp 
b/intern/cycles/device/opencl/opencl_split.cpp
index e1e1f54..7d8dd95 100644
--- a/intern/cycles/device/opencl/opencl_split.cpp
+++ b/intern/cycles/device/opencl/opencl_split.cpp
@@ -85,22 +85,57 @@ public:
        int buffer_rng_state_stride;
 };
 
+class OpenCLSplitKernelFunction : public SplitKernelFunction {
+public:
+       OpenCLDeviceBase* device;
+       OpenCLDeviceBase::OpenCLProgram program;
+
+       OpenCLSplitKernelFunction(OpenCLDeviceBase* device) : device(device) {}
+       ~OpenCLSplitKernelFunction() { program.release(); }
+
+       virtual bool enqueue(const KernelDimensions& dim, device_memory& kg, 
device_memory& data)
+       {
+               device->kernel_set_args(program(), 0, kg, data);
+
+               device->ciErr = clEnqueueNDRangeKernel(device->cqCommandQueue,
+                                                      program(),
+                                                      2,
+                                                      NULL,
+                                                      dim.global_size,
+                                                      dim.local_size,
+                                                      0,
+                                                      NULL,
+                                                      NULL);
+
+               device->opencl_assert_err(device->ciErr, 
"clEnqueueNDRangeKernel");
+
+               if(device->ciErr != CL_SUCCESS) {
+                       string message = string_printf("OpenCL error: %s in 
clEnqueueNDRangeKernel()",
+                                                      
clewErrorString(device->ciErr));
+                       device->opencl_error(message);
+                       return false;
+               }
+
+               return true;
+       }
+};
+
 /* OpenCLDeviceSplitKernel's declaration/definition. */
 class OpenCLDeviceSplitKernel : public OpenCLDeviceBase
 {
 public:
        /* Kernel declaration. */
        OpenCLProgram program_data_init;
-       OpenCLProgram program_scene_intersect;
-       OpenCLProgram program_lamp_emission;
-       OpenCLProgram program_queue_enqueue;
-       OpenCLProgram program_background_buffer_update;
-       OpenCLProgram program_shader_eval;
-       OpenCLProgram program_holdout_emission_blurring_pathtermination_ao;
-       OpenCLProgram program_direct_lighting;
-       OpenCLProgram program_shadow_blocked;
-       OpenCLProgram program_next_iteration_setup;
-       OpenCLProgram program_sum_all_radiance;
+       SplitKernelFunction* program_scene_intersect;
+       SplitKernelFunction* program_lamp_emission;
+       SplitKernelFunction* program_queue_enqueue;
+       SplitKernelFunction* program_background_buffer_update;
+       SplitKernelFunction* program_shader_eval;
+       SplitKernelFunction* 
program_holdout_emission_blurring_pathtermination_ao;
+       SplitKernelFunction* program_direct_lighting;
+       SplitKernelFunction* program_shadow_blocked;
+       SplitKernelFunction* program_next_iteration_setup;
+       SplitKernelFunction* program_sum_all_radiance;
 
        /* Global memory variables [porting]; These memory is used for
         * co-operation between different kernels; Data written by one
@@ -206,8 +241,7 @@ public:
                return sizeof(KernelGlobals);
        }
 
-       virtual void load_kernels(const DeviceRequestedFeatures& 
requested_features,
-                                 vector<OpenCLProgram*> &programs)
+       string get_build_options(const DeviceRequestedFeatures& 
requested_features)
        {
                string build_options = "-D__SPLIT_KERNEL__ ";
 #ifdef __WORK_STEALING__
@@ -227,15 +261,25 @@ public:
                        build_options += " -D__COMPUTE_DEVICE_GPU__";
                }
 
-#define GLUE(a, b) a ## b
+               return build_options;
+       }
+
+       virtual bool load_kernels(const DeviceRequestedFeatures& 
requested_features,
+                                 vector<OpenCLProgram*> &programs)
+       {
+               program_data_init = OpenCLProgram(this,
+                                                 "split_data_init",
+                                                 "kernel_data_init.cl",
+                                                 
get_build_options(requested_features));
+               program_data_init.add_kernel(ustring("path_trace_data_init"));
+               programs.push_back(&program_data_init);
+
 #define LOAD_KERNEL(name) \
-       do { \
-               GLUE(program_, name) = OpenCLProgram(this, "split_" #name, 
"kernel_" #name ".cl", build_options); \
-               GLUE(program_, name).add_kernel(ustring("path_trace_" #name)); \
-               programs.push_back(&GLUE(program_, name)); \
-       } while(false)
+                       program_##name = get_split_kernel_function(#name, 
requested_features); \
+                       if(!program_##name) { \
+                               return false;\
+                       }
 
-               LOAD_KERNEL(data_init);
                LOAD_KERNEL(scene_intersect);
                LOAD_KERNEL(lamp_emission);
                LOAD_KERNEL(queue_enqueue);
@@ -247,10 +291,31 @@ public:
                LOAD_KERNEL(next_iteration_setup);
                LOAD_KERNEL(sum_all_radiance);
 
-#undef FIND_KERNEL
-#undef GLUE
+#undef LOAD_KERNEL
 
                current_max_closure = requested_features.max_closure;
+
+               return true;
+       }
+
+       virtual SplitKernelFunction* get_split_kernel_function(string 
kernel_name,
+                                                              const 
DeviceRequestedFeatures& requested_features)
+       {
+               OpenCLSplitKernelFunction* kernel = new 
OpenCLSplitKernelFunction(this);
+
+               kernel->program = OpenCLProgram(this,
+                                               "split_" + kernel_name,
+                                               "kernel_" + kernel_name + ".cl",
+                                               
get_build_options(requested_features));
+               kernel->program.add_kernel(ustring("path_trace_" + 
kernel_name));
+               kernel->program.load();
+
+               if(!kernel->program.is_loaded()) {
+                       delete kernel;
+                       return NULL;
+               }
+
+               return kernel;
        }
 
        ~OpenCLDeviceSplitKernel()
@@ -259,16 +324,16 @@ public:
 
                /* Release kernels */
                program_data_init.release();
-               program_scene_intersect.release();
-               program_lamp_emission.release();
-               program_queue_enqueue.release();
-               program_background_buffer_update.release();
-               program_shader_eval.release();
-               program_holdout_emission_blurring_pathtermination_ao.release();
-               program_direct_lighting.release();
-               program_shadow_blocked.release();
-               program_next_iteration_setup.release();
-               program_sum_all_radiance.release();
+               delete program_scene_intersect;
+               delete program_lamp_emission;
+               delete program_queue_enqueue;
+               delete program_background_buffer_update;
+               delete program_shader_eval;
+               delete program_holdout_emission_blurring_pathtermination_ao;
+               delete program_direct_lighting;
+               delete program_shadow_blocked;
+               delete program_next_iteration_setup;
+               delete program_sum_all_radiance;
 
                /* Release global memory */
                mem_free(kgbuffer);
@@ -418,51 +483,37 @@ public:
                                        rtile.buffer_rng_state_stride,
                                                        d_buffer);
 
-#define KERNEL_SET_ARGS(name) kernel_set_args(program_##name(), 0, kgbuffer, 
d_data);
-               KERNEL_SET_ARGS(scene_intersect);
-               KERNEL_SET_ARGS(lamp_emission);
-               KERNEL_SET_ARGS(queue_enqueue);
-               KERNEL_SET_ARGS(background_buffer_update);
-               KERNEL_SET_ARGS(shader_eval);
-               KERNEL_SET_ARGS(holdout_emission_blurring_pathtermination_ao);
-               KERNEL_SET_ARGS(direct_lighting);
-               KERNEL_SET_ARGS(shadow_blocked);
-               KERNEL_SET_ARGS(next_iteration_setup);
-               KERNEL_SET_ARGS(sum_all_radiance);
-#undef KERNEL_SET_ARGS
-
-               /* Macro for Enqueuing split kernels. */
-#define GLUE(a, b) a ## b
-#define ENQUEUE_SPLIT_KERNEL(kernelName, globalSize, localSize) \
-               { \
-                       ciErr = clEnqueueNDRangeKernel(cqCommandQueue, \
-                                                      GLUE(program_, \
-                                           

@@ Diff output truncated at 10240 characters. @@

_______________________________________________
Bf-blender-cvs mailing list
Bf-blender-cvs@blender.org
https://lists.blender.org/mailman/listinfo/bf-blender-cvs

Reply via email to