Commit: f928cd357fd6b2e0d6c0af409e0de0f3ce502fa2
Author: Lukas Stockner
Date:   Sun Nov 20 05:15:55 2016 +0100
Branches: soc-2016-cycles_denoising
https://developer.blender.org/rBf928cd357fd6b2e0d6c0af409e0de0f3ce502fa2

Cycles: Implement Cross-Denoising

Right now this is pretty bad, but it can be made much better.

===================================================================

M       intern/cycles/blender/addon/ui.py
M       intern/cycles/blender/blender_session.cpp
M       intern/cycles/blender/blender_util.h
M       intern/cycles/device/device_cpu.cpp
M       intern/cycles/kernel/kernel_filter.h
M       intern/cycles/kernel/kernel_filter_pre.h
M       intern/cycles/kernel/kernel_passes.h
M       intern/cycles/kernel/kernel_types.h
M       intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
M       intern/cycles/kernel/kernels/cuda/kernel.cu
M       intern/cycles/render/buffers.cpp
M       intern/cycles/render/buffers.h
M       intern/cycles/render/film.cpp
M       intern/cycles/render/film.h
M       source/blender/makesdna/DNA_scene_types.h
M       source/blender/makesrna/intern/rna_scene.c
M       source/blender/render/intern/source/render_result.c

===================================================================

diff --git a/intern/cycles/blender/addon/ui.py 
b/intern/cycles/blender/addon/ui.py
index a0b387d..ea07ac2 100644
--- a/intern/cycles/blender/addon/ui.py
+++ b/intern/cycles/blender/addon/ui.py
@@ -580,6 +580,7 @@ class CyclesRender_PT_denoising(CyclesButtonsPanel, Panel):
         sub.prop(rl, "filter_weighting_adjust", slider=True)
         sub.prop(rl, "filter_gradients")
         sub.prop(rl, "filter_use_nlm_weights")
+        sub.prop(rl, "filter_cross")
 
         sub = col.column(align=True)
         row = sub.row(align=True)
diff --git a/intern/cycles/blender/blender_session.cpp 
b/intern/cycles/blender/blender_session.cpp
index 7867c0e..4dadc63 100644
--- a/intern/cycles/blender/blender_session.cpp
+++ b/intern/cycles/blender/blender_session.cpp
@@ -462,7 +462,9 @@ void BlenderSession::render()
                if(b_layer_iter->denoise_subsurface_direct()) 
scene->film->denoise_flags |= DENOISE_SUBSURFACE_DIR;
                if(b_layer_iter->denoise_subsurface_indirect()) 
scene->film->denoise_flags |= DENOISE_SUBSURFACE_IND;
                scene->film->selective_denoising = (scene->film->denoise_flags 
!= DENOISE_ALL);
+               scene->film->cross_denoising = b_layer_iter->filter_cross();
                buffer_params.selective_denoising = 
scene->film->selective_denoising;
+               buffer_params.cross_denoising = scene->film->cross_denoising;
                scene->integrator->half_window = b_layer_iter->half_window();
                scene->integrator->filter_strength = powf(2.0f, 
b_layer_iter->filter_strength());
                scene->integrator->weighting_adjust = powf(2.0f, 
b_layer_iter->filter_weighting_adjust());
@@ -486,6 +488,10 @@ void BlenderSession::render()
                        add_pass(b_engine, SCE_PASS_DENOISE_SHADOW_B, 3, 
b_rlay_name.c_str(), NULL);
                        add_pass(b_engine, SCE_PASS_DENOISE_NOISY, 3, 
b_rlay_name.c_str(), NULL);
                        add_pass(b_engine, SCE_PASS_DENOISE_NOISY_VAR, 3, 
b_rlay_name.c_str(), NULL);
+                       if(buffer_params.cross_denoising) {
+                               add_pass(b_engine, SCE_PASS_DENOISE_NOISY_B, 3, 
b_rlay_name.c_str(), NULL);
+                               add_pass(b_engine, 
SCE_PASS_DENOISE_NOISY_B_VAR, 3, b_rlay_name.c_str(), NULL);
+                       }
                        if(buffer_params.selective_denoising) {
                                add_pass(b_engine, SCE_PASS_DENOISE_CLEAN, 3, 
b_rlay_name.c_str(), NULL);
                        }
diff --git a/intern/cycles/blender/blender_util.h 
b/intern/cycles/blender/blender_util.h
index f311f15..de9ab2d 100644
--- a/intern/cycles/blender/blender_util.h
+++ b/intern/cycles/blender/blender_util.h
@@ -52,6 +52,8 @@ typedef enum BlenderDenoisePasses {
        SCE_PASS_DENOISE_NOISY            = 40, /* The original noisy image 
(only the components that are denoised). */
        SCE_PASS_DENOISE_NOISY_VAR        = 41,
        SCE_PASS_DENOISE_CLEAN            = 42, /* If present, these image 
components are added to the denoised image. */
+       SCE_PASS_DENOISE_NOISY_B          = 43,
+       SCE_PASS_DENOISE_NOISY_B_VAR      = 44,
 } BlenderDenoisePasses;
 
 void python_thread_state_save(void **python_thread_state);
diff --git a/intern/cycles/device/device_cpu.cpp 
b/intern/cycles/device/device_cpu.cpp
index 7fb97bf..fcce0ff 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -281,10 +281,12 @@ public:
 
        float* denoise_fill_buffer(KernelGlobals *kg, int sample, int4 rect, 
float** buffers, int* tile_x, int* tile_y, int *offsets, int *strides, int 
frames, int *frame_strides)
        {
+               bool cross_denoise = kg->__data.film.denoise_cross;
                int w = align_up(rect.z - rect.x, 4), h = (rect.w - rect.y);
                int pass_stride = w*h*frames;
-               float *filter_buffers = new float[22*pass_stride];
-               memset(filter_buffers, 0, sizeof(float)*22*pass_stride);
+               int passes = cross_denoise? 28:22;
+               float *filter_buffers = new float[passes*pass_stride];
+               memset(filter_buffers, 0, sizeof(float)*passes*pass_stride);
 
 
                for(int frame = 0; frame < frames; frame++) {
@@ -414,13 +416,27 @@ public:
 
                        /* ==== Step 3: Copy combined color pass. ==== */
                        {
-                               int mean_from[]      = {20, 21, 22};
-                               int variance_from[]  = {23, 24, 25};
-                               int offset_to[]      = {16, 18, 20};
-                               for(int i = 0; i < 3; i++) {
-                                       for(int y = rect.y; y < rect.w; y++) {
-                                               for(int x = rect.x; x < rect.z; 
x++) {
-                                                       
filter_get_feature_kernel()(kg, sample, buffer, mean_from[i], variance_from[i], 
x, y, tile_x, tile_y, offsets, strides, filter_buffer + 
offset_to[i]*pass_stride, filter_buffer + (offset_to[i]+1)*pass_stride, 
&rect.x);
+                               if(cross_denoise) {
+                                       int mean_from[]      = {20, 21, 22, 26, 
27, 28};
+                                       int variance_from[]  = {23, 24, 25, 29, 
30, 31};
+                                       int offset_to[]      = {16, 18, 20, 22, 
24, 26};
+                                       for(int i = 0; i < 6; i++) {
+                                               for(int y = rect.y; y < rect.w; 
y++) {
+                                                       for(int x = rect.x; x < 
rect.z; x++) {
+                                                               
filter_get_feature_kernel()(kg, sample, buffer, mean_from[i], variance_from[i], 
x, y, tile_x, tile_y, offsets, strides, filter_buffer + 
offset_to[i]*pass_stride, filter_buffer + (offset_to[i]+1)*pass_stride, 
&rect.x);
+                                                       }
+                                               }
+                                       }
+                               }
+                               else {
+                                       int mean_from[]      = {20, 21, 22};
+                                       int variance_from[]  = {23, 24, 25};
+                                       int offset_to[]      = {16, 18, 20};
+                                       for(int i = 0; i < 3; i++) {
+                                               for(int y = rect.y; y < rect.w; 
y++) {
+                                                       for(int x = rect.x; x < 
rect.z; x++) {
+                                                               
filter_get_feature_kernel()(kg, sample, buffer, mean_from[i], variance_from[i], 
x, y, tile_x, tile_y, offsets, strides, filter_buffer + 
offset_to[i]*pass_stride, filter_buffer + (offset_to[i]+1)*pass_stride, 
&rect.x);
+                                                       }
                                                }
                                        }
                                }
diff --git a/intern/cycles/kernel/kernel_filter.h 
b/intern/cycles/kernel/kernel_filter.h
index edafafa..594ed77 100644
--- a/intern/cycles/kernel/kernel_filter.h
+++ b/intern/cycles/kernel/kernel_filter.h
@@ -456,7 +456,7 @@ ccl_device void kernel_filter_final_pass_wlr(KernelGlobals 
*kg, int sample, floa
        }
 }
 
-ccl_device void kernel_filter_final_pass_nlm(KernelGlobals *kg, int sample, 
float ccl_readonly_ptr buffer, int x, int y, int offset, int stride, float 
*buffers, float ccl_readonly_ptr transform, CUDAFilterStorage *storage, int4 
filter_area, int4 rect, int transform_stride, int localIdx)
+ccl_device void kernel_filter_final_pass_nlm(KernelGlobals *kg, int sample, 
float ccl_readonly_ptr buffer, int x, int y, int offset, int stride, float 
*buffers, float ccl_readonly_ptr transform, CUDAFilterStorage *storage, int4 
filter_area, int4 rect, int transform_stride, int localIdx, int weight_ofs, int 
color_ofs)
 {
        __shared__ float 
shared_features[DENOISE_FEATURES*CUDA_THREADS_BLOCK_WIDTH*CUDA_THREADS_BLOCK_WIDTH];
        float *features = shared_features + DENOISE_FEATURES*localIdx;
@@ -464,6 +464,8 @@ ccl_device void kernel_filter_final_pass_nlm(KernelGlobals 
*kg, int sample, floa
        int buffer_w = align_up(rect.z - rect.x, 4);
        int buffer_h = (rect.w - rect.y);
        int pass_stride = buffer_h * buffer_w * kernel_data.film.num_frames;
+       weight_ofs *= pass_stride;
+       color_ofs *= pass_stride;
        int num_frames = kernel_data.film.num_frames;
        int prev_frames = kernel_data.film.prev_frames;
        /* === Calculate denoising window. === */
@@ -474,8 +476,8 @@ ccl_device void kernel_filter_final_pass_nlm(KernelGlobals 
*kg, int sample, floa
        float ccl_readonly_ptr pixel_buffer;
        /* === Get center pixel. === */
        float ccl_readonly_ptr center_buffer = buffer + (y - rect.y) * buffer_w 
+ (x - rect.x);
-       float3 center_color  = filter_get_pixel_color(center_buffer, 
pass_stride);
-       float sqrt_center_variance = 
sqrtf(filter_get_pixel_variance(center_buffer, pass_stride));
+       float3 center_color  = filter_get_pixel_color(center_buffer + 
color_ofs, pass_stride);
+       float sqrt_center_variance = 
sqrtf(filter_get_pixel_variance(center_buffer + color_ofs, pass_stride));
 
        float feature_means[DENOISE_FEATURES];
        filter_get_features(x, y, 0, center_buffer, feature_means, NULL, 
pass_stride);
@@ -502,8 +504,8 @@ ccl_device void kernel_filter_final_pass_nlm(KernelGlobals 
*kg, int sample, floa
        math_matrix_zero_lower(XtX, matrix_size);
 
        FOR_PIXEL_WINDOW {
-               float3 color = filter_get_pixel_color(pixel_buffer, 
pass_stride);
-               float variance = filter_get_pixel_variance(pixel_buffer, 
pass_stride);
+               float3 color = filter_get_pixel_color(pixel_buffer + color_ofs, 
pass_stride);
+               float variance = filter_get_pixel_variance(pixel_buffer + 
color_ofs, pass_stride);
                if(filter_firefly_rejection(color, variance, center_color, 
sqrt_center_variance)) {
                        if(cache_idx < WEIGHT_CACHE_SIZE) 
weight_cache[cache_idx] = 0.0f;
                        continue;
@@ -512,7 +514,7 @@ ccl_device void kernel_filter_final_pass_nlm(KernelGlobals 
*kg, int sample, floa
                filter_get_features(px, py, pt, pixel_buffer, features, 
feature_means, pass_stride);
                filter_fill_design_row_no_weight_cuda(features, rank, 
design_row, transform, transform_stride);
 
-               float weight = nlm_weight(x, y, px, py, center_buffer, 
pixel_buffer, pass_stride, 1.0f, kernel_data.integrator.weighting_adjust, 4, 
rect);
+               float weight = nlm_weight(x, y, px, py, center_buffer + 
weight_ofs, pixel_buffer + weight_ofs, pass_stride, 1.0f, 
kernel_data.integrator.weighting_adjust, 4, rect);
                if(weight < 1e-5f) {
                        if(cache_idx < WEIGHT_CACHE_SIZE) 
weight_cache[cache_idx] = 0.0f;
                        continue;
@@ -559,14 +561,14 @@ ccl_device void 
kernel_filter_final_pass_nlm(KernelGlobals *kg, int sample, floa
                if(cache_idx < WEIGHT_CACHE_SIZE) {
                        weight = weight_cache[cache_idx];
                        if(weight == 0.0f) continue;
-                       color = filter_get_pixel_color(pixel_buffer, 
pass_stride);
+                       color = filter_get_pixel_color(pixel_buffer + 
color_ofs, pass_stride);
                }
                else {
-                       color = filter_get_pixel_color(pixel_buffer, 
pass_stride);
-                       float variance = 
filter_get_pixel_variance(pixel_buffer, pass_stride);
+                       color = filter_get_pixel_color(pixel_buffer + 
color_ofs, pass_stride);
+                       float variance = filter_get_pixel_variance(pixel_buffer 
+ color_ofs, pass_stride);
                        if(filter_firefly_rejection(color, variance, 
center_color, sqrt_center_variance)) continue;
 
-                       weight = nlm_weight(x, y, px, py, center_buffer, 
pixel_buffer, pass_stride, 1.0f, kernel_data.integrator.weighting_adjust, 4, 
rect);
+                       weight = nlm_weight(x, y, px, py, center_buffer + 
weight_ofs, pixel_buffer + weight_ofs, pass_stride, 1.0f, 
kernel_data.integrator.weighting_adjust, 4, rect);
                        if(weight < 1e-5f) continue;
                        weight /= max(1.0f, varianc

@@ Diff output truncated at 10240 characters. @@

_______________________________________________
Bf-blender-cvs mailing list
[email protected]
https://lists.blender.org/mailman/listinfo/bf-blender-cvs

Reply via email to