Commit: 20f67b18c856cf159a3aa0ccf6df5c40663fda92
Author: Lukas Stockner
Date:   Wed Nov 16 15:57:06 2016 +0100
Branches: soc-2016-cycles_denoising
https://developer.blender.org/rB20f67b18c856cf159a3aa0ccf6df5c40663fda92

Cycles: Move collaborative division into the kernel

===================================================================

M       intern/cycles/device/device_cpu.cpp
M       intern/cycles/kernel/kernel_filter.h
M       intern/cycles/kernel/kernel_filter_pre.h
M       intern/cycles/kernel/kernels/cpu/kernel_cpu.h
M       intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h

===================================================================

diff --git a/intern/cycles/device/device_cpu.cpp 
b/intern/cycles/device/device_cpu.cpp
index be9bb1d..3465a10 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -144,6 +144,7 @@ public:
        KernelFunctions<void(*)(int, int, float**, float**, float**, float**, 
int*, int, int, float, float)>              filter_non_local_means_3_kernel;
        KernelFunctions<void(*)(KernelGlobals*, float*, int, int, int, int, 
float, float*, int*)>                         filter_old_1_kernel;
        KernelFunctions<void(*)(KernelGlobals*, float*, float*, int, int, int, 
int, int, int, float, float*, int*, int*)> filter_old_2_kernel;
+       KernelFunctions<void(*)(KernelGlobals*, int, int, int, float*, int, 
int)>                                          filter_divide_combined_kernel;
 
 #define KERNEL_FUNCTIONS(name) \
              KERNEL_NAME_EVAL(cpu, name), \
@@ -167,6 +168,7 @@ public:
          
filter_estimate_wlr_params_kernel(KERNEL_FUNCTIONS(filter_estimate_wlr_params)),
          filter_final_pass_wlr_kernel(KERNEL_FUNCTIONS(filter_final_pass_wlr)),
          filter_final_pass_nlm_kernel(KERNEL_FUNCTIONS(filter_final_pass_nlm)),
+         
filter_divide_combined_kernel(KERNEL_FUNCTIONS(filter_divide_combined)),
          
filter_non_local_means_3_kernel(KERNEL_FUNCTIONS(filter_non_local_means_3)),
          filter_old_1_kernel(KERNEL_FUNCTIONS(filter_old_1)),
          filter_old_2_kernel(KERNEL_FUNCTIONS(filter_old_2))
@@ -441,17 +443,6 @@ public:
                int w = align_up(rect.z - rect.x, 4), h = (rect.w - rect.y);
                int pass_stride = w*h;
 
-               if(use_collaborative_filtering) {
-                       for(int y = 0; y < filter_area.w; y++) {
-                               int py = y + filter_area.y;
-                               for(int x = 0; x < filter_area.z; x++) {
-                                       int px = x + filter_area.x;
-                                       float *p_buffers = buffers + (offset + 
py*stride + px)*kg->__data.film.pass_stride;
-                                       p_buffers[0] = p_buffers[1] = 
p_buffers[2] = p_buffers[3] = 0.0f;
-                               }
-                       }
-               }
-
                if(old_filter) {
                        for(int y = 0; y < filter_area.w; y++) {
                                for(int x = 0; x < filter_area.z; x++) {
@@ -545,18 +536,11 @@ public:
 
                if(use_collaborative_filtering) {
                        for(int y = 0; y < filter_area.w; y++) {
-                               int py = y + filter_area.y;
                                for(int x = 0; x < filter_area.z; x++) {
-                                       int px = x + filter_area.x;
-                                       float *p_buffers = buffers + (offset + 
py*stride + px)*kg->__data.film.pass_stride;
-                                       float fac = sample / p_buffers[3];
-                                       p_buffers[0] *= fac;
-                                       p_buffers[1] *= fac;
-                                       p_buffers[2] *= fac;
-                                       p_buffers[3] *= fac;
+                                       filter_divide_combined_kernel()(kg, x + 
filter_area.x, y + filter_area.y, sample, buffers, offset, stride);
                                }
                        }
-               }
+               }
 
                delete[] storage;
        }
diff --git a/intern/cycles/kernel/kernel_filter.h 
b/intern/cycles/kernel/kernel_filter.h
index ea065f9..4f6290f 100644
--- a/intern/cycles/kernel/kernel_filter.h
+++ b/intern/cycles/kernel/kernel_filter.h
@@ -1500,4 +1500,11 @@ ccl_device void 
kernel_filter_final_pass_nlm(KernelGlobals *kg, int sample, floa
 
 #endif // __KERNEL_CUDA__
 
+ccl_device void kernel_filter_divide_combined(KernelGlobals *kg, int x, int y, 
int sample, float *buffers, int offset, int stride)
+{
+       float4 *combined_buffer = (float4*) (buffers + (offset + y*stride + 
x)*kernel_data.film.pass_stride);
+       float fac = sample / combined_buffer->w;
+       *combined_buffer = *combined_buffer * fac;
+}
+
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_filter_pre.h 
b/intern/cycles/kernel/kernel_filter_pre.h
index 3f8b5b5..93d424f 100644
--- a/intern/cycles/kernel/kernel_filter_pre.h
+++ b/intern/cycles/kernel/kernel_filter_pre.h
@@ -30,7 +30,12 @@ ccl_device void kernel_filter_divide_shadow(KernelGlobals 
*kg, int sample, float
        int xtile = (x < tile_x[1])? 0: ((x < tile_x[2])? 1: 2);
        int ytile = (y < tile_y[1])? 0: ((y < tile_y[2])? 1: 2);
        int tile = ytile*3+xtile;
-       float ccl_readonly_ptr center_buffer = buffers[tile] + (offset[tile] + 
y*stride[tile] + x)*kernel_data.film.pass_stride + 
kernel_data.film.pass_denoising;
+       float *center_buffer = buffers[tile] + (offset[tile] + y*stride[tile] + 
x)*kernel_data.film.pass_stride;
+
+       if(kernel_data.integrator.use_collaborative_filtering) {
+               center_buffer[0] = center_buffer[1] = center_buffer[2] = 
center_buffer[3] = 0.0f;
+       }
+       center_buffer += kernel_data.film.pass_denoising;
 
        int buffer_w = align_up(rect.z - rect.x, 4);
        int idx = (y-rect.y)*buffer_w + (x - rect.x);
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h 
b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h
index 478f2fa..5aaa20e 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h
@@ -145,6 +145,13 @@ void 
KERNEL_FUNCTION_FULL_NAME(filter_final_pass_nlm)(KernelGlobals *kg,
                                                   int* filter_area,
                                                   int* rect);
 
+void KERNEL_FUNCTION_FULL_NAME(filter_divide_combined)(KernelGlobals *kg,
+                                                       int x, int y,
+                                                       int sample,
+                                                       float *buffers,
+                                                       int offset,
+                                                       int stride);
+
 void KERNEL_FUNCTION_FULL_NAME(filter_old_1)(KernelGlobals *kg,
                                              float *denoise_data,
                                              int x, int y,
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h 
b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
index cc95a10..c5b9112 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
@@ -326,6 +326,20 @@ void 
KERNEL_FUNCTION_FULL_NAME(filter_final_pass_nlm)(KernelGlobals *kg,
 #endif
 }
 
+void KERNEL_FUNCTION_FULL_NAME(filter_divide_combined)(KernelGlobals *kg,
+                                                       int x, int y,
+                                                       int sample,
+                                                       float *buffers,
+                                                       int offset,
+                                                       int stride)
+{
+#ifdef KERNEL_STUB
+       STUB_ASSERT(KERNEL_ARCH, filter_divide_combined);
+#else
+       kernel_filter_divide_combined(kg, x, y, sample, buffers, offset, 
stride);
+#endif
+}
+
 void KERNEL_FUNCTION_FULL_NAME(filter_old_1)(KernelGlobals *kg,
                                              float *denoise_data,
                                              int x, int y,

_______________________________________________
Bf-blender-cvs mailing list
[email protected]
https://lists.blender.org/mailman/listinfo/bf-blender-cvs

Reply via email to