Commit: 520b53364c73c75c4ff400d639dad13630f0e6fc
Author: Mai Lavelle
Date:   Thu Jan 26 01:41:48 2017 -0500
Branches: master
https://developer.blender.org/rB520b53364c73c75c4ff400d639dad13630f0e6fc

Cycles: Add OpenCL kernel for zeroing memory buffers

Transferring memory to the device was very slow and there's really no
need when only zeroing a buffer.

===================================================================

M       intern/cycles/device/opencl/opencl_base.cpp
M       intern/cycles/kernel/kernels/opencl/kernel.cl

===================================================================

diff --git a/intern/cycles/device/opencl/opencl_base.cpp 
b/intern/cycles/device/opencl/opencl_base.cpp
index c3f099b795..7fa14eee70 100644
--- a/intern/cycles/device/opencl/opencl_base.cpp
+++ b/intern/cycles/device/opencl/opencl_base.cpp
@@ -206,6 +206,7 @@ bool OpenCLDeviceBase::load_kernels(const 
DeviceRequestedFeatures& requested_fea
        base_program.add_kernel(ustring("convert_to_half_float"));
        base_program.add_kernel(ustring("shader"));
        base_program.add_kernel(ustring("bake"));
+       base_program.add_kernel(ustring("zero_buffer"));
 
        vector<OpenCLProgram*> programs;
        programs.push_back(&base_program);
@@ -311,10 +312,61 @@ void OpenCLDeviceBase::mem_copy_from(device_memory& mem, 
int y, int w, int h, in
 void OpenCLDeviceBase::mem_zero(device_memory& mem)
 {
        if(mem.device_pointer) {
+               if(base_program.is_loaded()) {
+                       cl_kernel ckZeroBuffer = 
base_program(ustring("zero_buffer"));
+
+                       size_t global_size[] = {1024, 1024};
+                       size_t num_threads = global_size[0] * global_size[1];
+
+                       cl_mem d_buffer = CL_MEM_PTR(mem.device_pointer);
+                       unsigned long long d_offset = 0;
+                       unsigned long long d_size = 0;
+
+                       while(d_offset < mem.memory_size()) {
+                               d_size = std::min<unsigned long 
long>(num_threads*sizeof(float4), mem.memory_size() - d_offset);
+
+                               kernel_set_args(ckZeroBuffer, 0, d_buffer, 
d_size, d_offset);
+
+                               ciErr = clEnqueueNDRangeKernel(cqCommandQueue,
+                                                                      
ckZeroBuffer,
+                                                                      2,
+                                                                      NULL,
+                                                                      
global_size,
+                                                                      NULL,
+                                                                      0,
+                                                                      NULL,
+                                                                      NULL);
+                               opencl_assert_err(ciErr, 
"clEnqueueNDRangeKernel");
+
+                               d_offset += d_size;
+                       }
+               }
+
                if(mem.data_pointer) {
                        memset((void*)mem.data_pointer, 0, mem.memory_size());
                }
-               mem_copy_to(mem);
+
+               if(!base_program.is_loaded()) {
+                       void* zero = (void*)mem.data_pointer;
+
+                       if(!mem.data_pointer) {
+                               zero = util_aligned_malloc(mem.memory_size(), 
16);
+                               memset(zero, 0, mem.memory_size());
+                       }
+
+                       opencl_assert(clEnqueueWriteBuffer(cqCommandQueue,
+                                   CL_MEM_PTR(mem.device_pointer),
+                                   CL_TRUE,
+                                   0,
+                                   mem.memory_size(),
+                                   zero,
+                                   0,
+                                   NULL, NULL));
+
+                       if(!mem.data_pointer) {
+                               util_aligned_free(zero);
+                       }
+               }
        }
 }
 
diff --git a/intern/cycles/kernel/kernels/opencl/kernel.cl 
b/intern/cycles/kernel/kernels/opencl/kernel.cl
index a68f97857b..e501fd4f01 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel.cl
@@ -193,4 +193,20 @@ __kernel void kernel_ocl_convert_to_half_float(
                kernel_film_convert_to_half_float(kg, rgba, buffer, 
sample_scale, x, y, offset, stride);
 }
 
+__kernel void kernel_ocl_zero_buffer(ccl_global float4 *buffer, ulong size, 
ulong offset)
+{
+       size_t i = get_global_id(0) + get_global_id(1) * get_global_size(0);
+
+       if(i < size / sizeof(float4)) {
+               buffer[i+offset/sizeof(float4)] = make_float4(0.0f, 0.0f, 0.0f, 
0.0f);
+       }
+       else if(i == size / sizeof(float4)) {
+               ccl_global uchar *b = (ccl_global 
uchar*)&buffer[i+offset/sizeof(float4)];
+
+               for(i = 0; i < size % sizeof(float4); i++) {
+                       *(b++) = 0;
+               }
+       }
+}
+
 #endif  /* __COMPILE_ONLY_MEGAKERNEL__ */

_______________________________________________
Bf-blender-cvs mailing list
Bf-blender-cvs@blender.org
https://lists.blender.org/mailman/listinfo/bf-blender-cvs

Reply via email to