Commit: f487a153359ef2d37aa993f5d294fd6c4e86abdb
Author: Lukas Stockner
Date:   Sun Jul 24 02:18:18 2016 +0200
Branches: soc-2016-cycles_denoising
https://developer.blender.org/rBf487a153359ef2d37aa993f5d294fd6c4e86abdb

Cycles: Use the prefiltered shadow feature for denoising

This commit finally adds the prefiltered shadow feature to the main denoising 
algorithm.
Doing so improves detail preservation a lot: Although the main focus are sharp 
shadow edges, it actually also helps for Ambient-Occlusion-like and geometric 
details.

The only issue is that some geometric edges might be a bit noisier after 
denoising, but that will be fixed in the future by downweighting the shadow 
feature
when the geometric changes (normals and depth features) are strong.

===================================================================

M       intern/cycles/device/device_cpu.cpp
M       intern/cycles/kernel/kernel_filter.h
M       intern/cycles/kernel/kernel_types.h
M       intern/cycles/kernel/kernels/cpu/kernel_cpu.h
M       intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h

===================================================================

diff --git a/intern/cycles/device/device_cpu.cpp 
b/intern/cycles/device/device_cpu.cpp
index 4a8acfd..47e977c 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -361,8 +361,8 @@ public:
                RenderTile tile;
 
                void(*path_trace_kernel)(KernelGlobals*, float*, unsigned int*, 
int, int, int, int, int);
-               void(*filter_estimate_params_kernel)(KernelGlobals*, int, 
float**, int, int, int*, int*, int*, int*, void*, int4);
-               void(*filter_final_pass_kernel)(KernelGlobals*, int, float**, 
int, int, int*, int*, int*, int*, void*, int4);
+               void(*filter_estimate_params_kernel)(KernelGlobals*, int, 
float**, int, int, int*, int*, int*, int*, void*, float2*, int4, int4);
+               void(*filter_final_pass_kernel)(KernelGlobals*, int, float**, 
int, int, int*, int*, int*, int*, void*, float2*, int4, int4);
 
 #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2
                if(system_cpu_support_avx2()) {
@@ -442,21 +442,26 @@ public:
                                        int offsets[9] = {0, 0, 0, 0, 
tile.offset, 0, 0, 0, 0};
                                        int strides[9] = {0, 0, 0, 0, 
tile.stride, 0, 0, 0, 0};
                                        float *buffers[9] = {NULL, NULL, NULL, 
NULL, (float*) tile.buffer, NULL, NULL, NULL, NULL};
-                                       FilterStorage *storages = new 
FilterStorage[tile.buffers->params.final_width*tile.buffers->params.final_height];
 
                                        int overscan = 
tile.buffers->params.overscan;
                                        int4 filter_rect = make_int4(tile.x + 
overscan, tile.y + overscan, tile.x + tile.w - overscan, tile.y + tile.h - 
overscan);
+                                       int4 prefilter_rect = make_int4(tile.x, 
tile.y, tile.x + tile.w, tile.y + tile.h);
+
+                                       float2* prefiltered = 
denoise_prefilter(prefilter_rect, tile, &kg, end_sample, buffers, tile_x, 
tile_y, offsets, strides);
+                                       FilterStorage *storages = new 
FilterStorage[tile.buffers->params.final_width*tile.buffers->params.final_height];
+
                                        for(int y = filter_rect.y; y < 
filter_rect.w; y++) {
                                                for(int x = filter_rect.x; x < 
filter_rect.z; x++) {
-                                                       
filter_estimate_params_kernel(&kg, end_sample, buffers, x, y, tile_x, tile_y, 
offsets, strides, storages, filter_rect);
+                                                       
filter_estimate_params_kernel(&kg, end_sample, buffers, x, y, tile_x, tile_y, 
offsets, strides, storages, prefiltered, filter_rect, prefilter_rect);
                                                }
                                        }
                                        for(int y = filter_rect.y; y < 
filter_rect.w; y++) {
                                                for(int x = filter_rect.x; x < 
filter_rect.z; x++) {
-                                                       
filter_final_pass_kernel(&kg, end_sample, buffers, x, y, tile_x, tile_y, 
offsets, strides, storages, filter_rect);
+                                                       
filter_final_pass_kernel(&kg, end_sample, buffers, x, y, tile_x, tile_y, 
offsets, strides, storages, prefiltered, filter_rect, prefilter_rect);
                                                }
                                        }
 
+                                       delete[] prefiltered;
 #ifdef WITH_CYCLES_DEBUG_FILTER
 #define WRITE_DEBUG(name, var) 
debug_write_pfm(string_printf("debug_%dx%d_%s.pfm", tile.x, tile.y, 
name).c_str(), &storages[0].var, tile.buffers->params.final_width, 
tile.buffers->params.final_height, sizeof(FilterStorage)/sizeof(float), 
tile.buffers->params.final_width);
                                        for(int i = 0; i < DENOISE_FEATURES; 
i++) {
@@ -494,16 +499,25 @@ public:
                                FilterStorage *storages = new 
FilterStorage[tile.w*tile.h];
 
                                int4 filter_rect = make_int4(tile.x, tile.y, 
tile.x + tile.w, tile.y + tile.h);
+                               int hw = kg.__data.integrator.half_window;
+                               int4 prefilter_rect = make_int4(max(tile.x - 
hw, tile_x[0]), max(tile.y - hw, tile_y[0]), min(tile.x + tile.w + hw+1, 
tile_x[3]), min(tile.y + tile.h + hw+1, tile_y[3]));
+
+                               float2* prefiltered = 
denoise_prefilter(prefilter_rect, tile, &kg, sample, buffers, tile_x, tile_y, 
offsets, strides);
+
                                for(int y = filter_rect.y; y < filter_rect.w; 
y++) {
                                        for(int x = filter_rect.x; x < 
filter_rect.z; x++) {
-                                               
filter_estimate_params_kernel(&kg, sample, buffers, x, y, tile_x, tile_y, 
offsets, strides, storages, filter_rect);
+                                               
filter_estimate_params_kernel(&kg, sample, buffers, x, y, tile_x, tile_y, 
offsets, strides, storages, prefiltered, filter_rect, prefilter_rect);
                                        }
                                }
                                for(int y = filter_rect.y; y < filter_rect.w; 
y++) {
                                        for(int x = filter_rect.x; x < 
filter_rect.z; x++) {
-                                               filter_final_pass_kernel(&kg, 
sample, buffers, x, y, tile_x, tile_y, offsets, strides, storages, filter_rect);
+                                               filter_final_pass_kernel(&kg, 
sample, buffers, x, y, tile_x, tile_y, offsets, strides, storages, prefiltered, 
filter_rect, prefilter_rect);
                                        }
                                }
+                               delete[] prefiltered;
+
+
+
 #ifdef WITH_CYCLES_DEBUG_FILTER
 #define WRITE_DEBUG(name, var) 
debug_write_pfm(string_printf("debug_%dx%d_%s.pfm", tile.x, tile.y, 
name).c_str(), &storages[0].var, tile.w, tile.h, 
sizeof(FilterStorage)/sizeof(float), tile.w);
                                for(int i = 0; i < DENOISE_FEATURES; i++) {
diff --git a/intern/cycles/kernel/kernel_filter.h 
b/intern/cycles/kernel/kernel_filter.h
index 87b360b..dc649bb 100644
--- a/intern/cycles/kernel/kernel_filter.h
+++ b/intern/cycles/kernel/kernel_filter.h
@@ -18,18 +18,21 @@
 
 CCL_NAMESPACE_BEGIN
 
-#define FOR_PIXEL_WINDOW for(int py = low.y; py < high.y; py++) { \
+#define FOR_PIXEL_WINDOW pre_buffer = prefiltered + (low.y - 
prefilter_rect.y)*prefilter_w + (low.x - prefilter_rect.x); \
+                         for(int py = low.y; py < high.y; py++) { \
                              int ytile = (py < tile_y[1])? 0: ((py < 
tile_y[2])? 1: 2); \
-                                 for(int px = low.x; px < high.x; px++) { \
-                                     int xtile = (px < tile_x[1])? 0: ((px < 
tile_x[2])? 1: 2); \
-                                     int tile = ytile*3+xtile; \
-                                     buffer = buffers[tile] + (offset[tile] + 
py*stride[tile] + px)*kernel_data.film.pass_stride + 
kernel_data.film.pass_denoising;
+                             for(int px = low.x; px < high.x; px++, 
pre_buffer++) { \
+                                 int xtile = (px < tile_x[1])? 0: ((px < 
tile_x[2])? 1: 2); \
+                                 int tile = ytile*3+xtile; \
+                                 buffer = buffers[tile] + (offset[tile] + 
py*stride[tile] + px)*kernel_data.film.pass_stride + 
kernel_data.film.pass_denoising;
 
-#define END_FOR_PIXEL_WINDOW }}
+#define END_FOR_PIXEL_WINDOW } \
+                             pre_buffer += prefilter_w - (high.x - low.x); \
+                         }
 
-#define FEATURE_PASSES 7 /* Normals, Albedo, Depth */
+#define FEATURE_PASSES 8 /* Normals, Albedo, Depth */
 
-ccl_device_inline void filter_get_features(int x, int y, float *buffer, float 
sample, float *features, float *mean)
+ccl_device_inline void filter_get_features(int x, int y, float *buffer, float2 
*pre_buffer, float sample, float *features, float *mean)
 {
        float sample_scale = 1.0f/sample;
        features[0] = x;
@@ -38,21 +41,22 @@ ccl_device_inline void filter_get_features(int x, int y, 
float *buffer, float sa
        features[3] = buffer[0] * sample_scale;
        features[4] = buffer[1] * sample_scale;
        features[5] = buffer[2] * sample_scale;
-       features[6] = buffer[6] * sample_scale;
-       features[7] = buffer[7] * sample_scale;
-       features[8] = buffer[8] * sample_scale;
+       features[6] = pre_buffer->x;
+       features[7] = buffer[6] * sample_scale;
+       features[8] = buffer[7] * sample_scale;
+       features[9] = buffer[8] * sample_scale;
        if(mean) {
                for(int i = 0; i < DENOISE_FEATURES; i++)
                        features[i] -= mean[i];
        }
 #ifdef DENOISE_SECOND_ORDER_SCREEN
-       features[9] = features[0]*features[0];
-       features[10] = features[1]*features[1];
-       features[11] = features[0]*features[1];
+       features[10] = features[0]*features[0];
+       features[11] = features[1]*features[1];
+       features[12] = features[0]*features[1];
 #endif
 }
 
-ccl_device_inline void filter_get_feature_variance(int x, int y, float 
*buffer, float sample, float *features, float *scale)
+ccl_device_inline void filter_get_feature_variance(int x, int y, float 
*buffer, float2 *pre_buffer, float sample, float *features, float *scale)
 {
        float sample_scale = 1.0f/sample;
        float sample_scale_var = 1.0f/(sample - 1.0f);
@@ -62,13 +66,14 @@ ccl_device_inline void filter_get_feature_variance(int x, 
int y, float *buffer,
        features[3] = saturate(buffer[3] * sample_scale_var) * sample_scale;
        features[4] = saturate(buffer[4] * sample_scale_var) * sample_scale;
        features[5] = saturate(buffer[5] * sample_scale_var) * sample_scale;
-       features[6] = saturate(buffer[9] * sample_scale_var) * sample_scale;
-       features[7] = saturate(buffer[10] * sample_scale_var) * sample_scale;
-       features[8] = saturate(buffer[11] * sample_scale_var) * sample_scale;
+       features[6] = saturate(pre_buffer->y);
+       features[7] = saturate(buffer[9] * sample_scale_var) * sample_scale;
+       features[8] = saturate(buffer[10] * sample_scale_var) * sample_scale;
+       features[9] = saturate(buffer[11] * sample_scale_var) * sample_scale;
 #ifdef DENOISE_SECOND_ORDER_SCREEN
-       features[9] = 0.0f;
        features[10] = 0.0f;
        features[11] = 0.0f;
+       features[12] = 0.0f;
 #endif
        for(int i = 0; i < DENOISE_FEATURES; i++)
                features[i] *= scale[i]*scale[i];
@@ -224,16 +229,19 @@ ccl_device void kernel_filter_combine_halves(int x, int 
y, float *mean, float *v
  * - Start of the next upper/right neighbor (not accessed)
  * buffers contains the nine buffer pointers (y-major ordering, starting with 
the lower left tile), offset and stride the respective parameters of the tile.
  */
-ccl_device void kernel_filter_estimate_params(KernelGlobals *kg, int sample, 
float **buffers, int x, int y, int *tile_x, int *tile_y, int *offset, int 
*stride, FilterStorage *storage, int4 filter_rect)
+ccl_device void kernel_filter_estimate_params(KernelGlobals *kg, int sample, 
float **buffers, int x, int y, int *tile_x, int *tile_y, int *offset, int 
*stride, FilterStorage *storage, float2 *prefiltered, int4 filter_rect, int4 
prefilter_rect)
 {
        storage += (y-filter_rect.y)*(filter_rect.z-filter_rect.x) + 
(x-filter_rect.x);
+       int prefilter_w = (prefilter_rect.z - prefilter_rect.x);
 
        /* Temporary storage, used in different steps of the algorithm. */
        float tempmatrix[(2*DENOISE_FEATURES+1)*(2*DENOISE_FEATURES+1)], 
tempvector[4*DENOISE_FEATURES+1];
        float *buffer, features[DENOISE_FEATURES];
+       float2 *pre_buffer;
 
        /* === Get center pixel color and variance. === */
        float *center_buffer = buffers[4] + (offset[4] + y*stride[4] + x)*kernel

@@ Diff output truncated at 10240 characters. @@

_______________________________________________
Bf-blender-cvs mailing list
[email protected]
https://lists.blender.org/mailman/listinfo/bf-blender-cvs

Reply via email to