Commit: f487a153359ef2d37aa993f5d294fd6c4e86abdb
Author: Lukas Stockner
Date: Sun Jul 24 02:18:18 2016 +0200
Branches: soc-2016-cycles_denoising
https://developer.blender.org/rBf487a153359ef2d37aa993f5d294fd6c4e86abdb
Cycles: Use the prefiltered shadow feature for denoising
This commit finally adds the prefiltered shadow feature to the main denoising
algorithm.
Doing so improves detail preservation a lot: Although the main focus are sharp
shadow edges, it actually also helps for Ambient-Occlusion-like and geometric
details.
The only issue is that some geometric edges might be a bit noisier after
denoising, but that will be fixed in the future by downweighting the shadow
feature
when the geometric changes (normals and depth features) are strong.
===================================================================
M intern/cycles/device/device_cpu.cpp
M intern/cycles/kernel/kernel_filter.h
M intern/cycles/kernel/kernel_types.h
M intern/cycles/kernel/kernels/cpu/kernel_cpu.h
M intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
===================================================================
diff --git a/intern/cycles/device/device_cpu.cpp
b/intern/cycles/device/device_cpu.cpp
index 4a8acfd..47e977c 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -361,8 +361,8 @@ public:
RenderTile tile;
void(*path_trace_kernel)(KernelGlobals*, float*, unsigned int*,
int, int, int, int, int);
- void(*filter_estimate_params_kernel)(KernelGlobals*, int,
float**, int, int, int*, int*, int*, int*, void*, int4);
- void(*filter_final_pass_kernel)(KernelGlobals*, int, float**,
int, int, int*, int*, int*, int*, void*, int4);
+ void(*filter_estimate_params_kernel)(KernelGlobals*, int,
float**, int, int, int*, int*, int*, int*, void*, float2*, int4, int4);
+ void(*filter_final_pass_kernel)(KernelGlobals*, int, float**,
int, int, int*, int*, int*, int*, void*, float2*, int4, int4);
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2
if(system_cpu_support_avx2()) {
@@ -442,21 +442,26 @@ public:
int offsets[9] = {0, 0, 0, 0,
tile.offset, 0, 0, 0, 0};
int strides[9] = {0, 0, 0, 0,
tile.stride, 0, 0, 0, 0};
float *buffers[9] = {NULL, NULL, NULL,
NULL, (float*) tile.buffer, NULL, NULL, NULL, NULL};
- FilterStorage *storages = new
FilterStorage[tile.buffers->params.final_width*tile.buffers->params.final_height];
int overscan =
tile.buffers->params.overscan;
int4 filter_rect = make_int4(tile.x +
overscan, tile.y + overscan, tile.x + tile.w - overscan, tile.y + tile.h -
overscan);
+ int4 prefilter_rect = make_int4(tile.x,
tile.y, tile.x + tile.w, tile.y + tile.h);
+
+ float2* prefiltered =
denoise_prefilter(prefilter_rect, tile, &kg, end_sample, buffers, tile_x,
tile_y, offsets, strides);
+ FilterStorage *storages = new
FilterStorage[tile.buffers->params.final_width*tile.buffers->params.final_height];
+
for(int y = filter_rect.y; y <
filter_rect.w; y++) {
for(int x = filter_rect.x; x <
filter_rect.z; x++) {
-
filter_estimate_params_kernel(&kg, end_sample, buffers, x, y, tile_x, tile_y,
offsets, strides, storages, filter_rect);
+
filter_estimate_params_kernel(&kg, end_sample, buffers, x, y, tile_x, tile_y,
offsets, strides, storages, prefiltered, filter_rect, prefilter_rect);
}
}
for(int y = filter_rect.y; y <
filter_rect.w; y++) {
for(int x = filter_rect.x; x <
filter_rect.z; x++) {
-
filter_final_pass_kernel(&kg, end_sample, buffers, x, y, tile_x, tile_y,
offsets, strides, storages, filter_rect);
+
filter_final_pass_kernel(&kg, end_sample, buffers, x, y, tile_x, tile_y,
offsets, strides, storages, prefiltered, filter_rect, prefilter_rect);
}
}
+ delete[] prefiltered;
#ifdef WITH_CYCLES_DEBUG_FILTER
#define WRITE_DEBUG(name, var)
debug_write_pfm(string_printf("debug_%dx%d_%s.pfm", tile.x, tile.y,
name).c_str(), &storages[0].var, tile.buffers->params.final_width,
tile.buffers->params.final_height, sizeof(FilterStorage)/sizeof(float),
tile.buffers->params.final_width);
for(int i = 0; i < DENOISE_FEATURES;
i++) {
@@ -494,16 +499,25 @@ public:
FilterStorage *storages = new
FilterStorage[tile.w*tile.h];
int4 filter_rect = make_int4(tile.x, tile.y,
tile.x + tile.w, tile.y + tile.h);
+ int hw = kg.__data.integrator.half_window;
+ int4 prefilter_rect = make_int4(max(tile.x -
hw, tile_x[0]), max(tile.y - hw, tile_y[0]), min(tile.x + tile.w + hw+1,
tile_x[3]), min(tile.y + tile.h + hw+1, tile_y[3]));
+
+ float2* prefiltered =
denoise_prefilter(prefilter_rect, tile, &kg, sample, buffers, tile_x, tile_y,
offsets, strides);
+
for(int y = filter_rect.y; y < filter_rect.w;
y++) {
for(int x = filter_rect.x; x <
filter_rect.z; x++) {
-
filter_estimate_params_kernel(&kg, sample, buffers, x, y, tile_x, tile_y,
offsets, strides, storages, filter_rect);
+
filter_estimate_params_kernel(&kg, sample, buffers, x, y, tile_x, tile_y,
offsets, strides, storages, prefiltered, filter_rect, prefilter_rect);
}
}
for(int y = filter_rect.y; y < filter_rect.w;
y++) {
for(int x = filter_rect.x; x <
filter_rect.z; x++) {
- filter_final_pass_kernel(&kg,
sample, buffers, x, y, tile_x, tile_y, offsets, strides, storages, filter_rect);
+ filter_final_pass_kernel(&kg,
sample, buffers, x, y, tile_x, tile_y, offsets, strides, storages, prefiltered,
filter_rect, prefilter_rect);
}
}
+ delete[] prefiltered;
+
+
+
#ifdef WITH_CYCLES_DEBUG_FILTER
#define WRITE_DEBUG(name, var)
debug_write_pfm(string_printf("debug_%dx%d_%s.pfm", tile.x, tile.y,
name).c_str(), &storages[0].var, tile.w, tile.h,
sizeof(FilterStorage)/sizeof(float), tile.w);
for(int i = 0; i < DENOISE_FEATURES; i++) {
diff --git a/intern/cycles/kernel/kernel_filter.h
b/intern/cycles/kernel/kernel_filter.h
index 87b360b..dc649bb 100644
--- a/intern/cycles/kernel/kernel_filter.h
+++ b/intern/cycles/kernel/kernel_filter.h
@@ -18,18 +18,21 @@
CCL_NAMESPACE_BEGIN
-#define FOR_PIXEL_WINDOW for(int py = low.y; py < high.y; py++) { \
+#define FOR_PIXEL_WINDOW pre_buffer = prefiltered + (low.y -
prefilter_rect.y)*prefilter_w + (low.x - prefilter_rect.x); \
+ for(int py = low.y; py < high.y; py++) { \
int ytile = (py < tile_y[1])? 0: ((py <
tile_y[2])? 1: 2); \
- for(int px = low.x; px < high.x; px++) { \
- int xtile = (px < tile_x[1])? 0: ((px <
tile_x[2])? 1: 2); \
- int tile = ytile*3+xtile; \
- buffer = buffers[tile] + (offset[tile] +
py*stride[tile] + px)*kernel_data.film.pass_stride +
kernel_data.film.pass_denoising;
+ for(int px = low.x; px < high.x; px++,
pre_buffer++) { \
+ int xtile = (px < tile_x[1])? 0: ((px <
tile_x[2])? 1: 2); \
+ int tile = ytile*3+xtile; \
+ buffer = buffers[tile] + (offset[tile] +
py*stride[tile] + px)*kernel_data.film.pass_stride +
kernel_data.film.pass_denoising;
-#define END_FOR_PIXEL_WINDOW }}
+#define END_FOR_PIXEL_WINDOW } \
+ pre_buffer += prefilter_w - (high.x - low.x); \
+ }
-#define FEATURE_PASSES 7 /* Normals, Albedo, Depth */
+#define FEATURE_PASSES 8 /* Normals, Albedo, Depth */
-ccl_device_inline void filter_get_features(int x, int y, float *buffer, float
sample, float *features, float *mean)
+ccl_device_inline void filter_get_features(int x, int y, float *buffer, float2
*pre_buffer, float sample, float *features, float *mean)
{
float sample_scale = 1.0f/sample;
features[0] = x;
@@ -38,21 +41,22 @@ ccl_device_inline void filter_get_features(int x, int y,
float *buffer, float sa
features[3] = buffer[0] * sample_scale;
features[4] = buffer[1] * sample_scale;
features[5] = buffer[2] * sample_scale;
- features[6] = buffer[6] * sample_scale;
- features[7] = buffer[7] * sample_scale;
- features[8] = buffer[8] * sample_scale;
+ features[6] = pre_buffer->x;
+ features[7] = buffer[6] * sample_scale;
+ features[8] = buffer[7] * sample_scale;
+ features[9] = buffer[8] * sample_scale;
if(mean) {
for(int i = 0; i < DENOISE_FEATURES; i++)
features[i] -= mean[i];
}
#ifdef DENOISE_SECOND_ORDER_SCREEN
- features[9] = features[0]*features[0];
- features[10] = features[1]*features[1];
- features[11] = features[0]*features[1];
+ features[10] = features[0]*features[0];
+ features[11] = features[1]*features[1];
+ features[12] = features[0]*features[1];
#endif
}
-ccl_device_inline void filter_get_feature_variance(int x, int y, float
*buffer, float sample, float *features, float *scale)
+ccl_device_inline void filter_get_feature_variance(int x, int y, float
*buffer, float2 *pre_buffer, float sample, float *features, float *scale)
{
float sample_scale = 1.0f/sample;
float sample_scale_var = 1.0f/(sample - 1.0f);
@@ -62,13 +66,14 @@ ccl_device_inline void filter_get_feature_variance(int x,
int y, float *buffer,
features[3] = saturate(buffer[3] * sample_scale_var) * sample_scale;
features[4] = saturate(buffer[4] * sample_scale_var) * sample_scale;
features[5] = saturate(buffer[5] * sample_scale_var) * sample_scale;
- features[6] = saturate(buffer[9] * sample_scale_var) * sample_scale;
- features[7] = saturate(buffer[10] * sample_scale_var) * sample_scale;
- features[8] = saturate(buffer[11] * sample_scale_var) * sample_scale;
+ features[6] = saturate(pre_buffer->y);
+ features[7] = saturate(buffer[9] * sample_scale_var) * sample_scale;
+ features[8] = saturate(buffer[10] * sample_scale_var) * sample_scale;
+ features[9] = saturate(buffer[11] * sample_scale_var) * sample_scale;
#ifdef DENOISE_SECOND_ORDER_SCREEN
- features[9] = 0.0f;
features[10] = 0.0f;
features[11] = 0.0f;
+ features[12] = 0.0f;
#endif
for(int i = 0; i < DENOISE_FEATURES; i++)
features[i] *= scale[i]*scale[i];
@@ -224,16 +229,19 @@ ccl_device void kernel_filter_combine_halves(int x, int
y, float *mean, float *v
* - Start of the next upper/right neighbor (not accessed)
* buffers contains the nine buffer pointers (y-major ordering, starting with
the lower left tile), offset and stride the respective parameters of the tile.
*/
-ccl_device void kernel_filter_estimate_params(KernelGlobals *kg, int sample,
float **buffers, int x, int y, int *tile_x, int *tile_y, int *offset, int
*stride, FilterStorage *storage, int4 filter_rect)
+ccl_device void kernel_filter_estimate_params(KernelGlobals *kg, int sample,
float **buffers, int x, int y, int *tile_x, int *tile_y, int *offset, int
*stride, FilterStorage *storage, float2 *prefiltered, int4 filter_rect, int4
prefilter_rect)
{
storage += (y-filter_rect.y)*(filter_rect.z-filter_rect.x) +
(x-filter_rect.x);
+ int prefilter_w = (prefilter_rect.z - prefilter_rect.x);
/* Temporary storage, used in different steps of the algorithm. */
float tempmatrix[(2*DENOISE_FEATURES+1)*(2*DENOISE_FEATURES+1)],
tempvector[4*DENOISE_FEATURES+1];
float *buffer, features[DENOISE_FEATURES];
+ float2 *pre_buffer;
/* === Get center pixel color and variance. === */
float *center_buffer = buffers[4] + (offset[4] + y*stride[4] + x)*kernel
@@ Diff output truncated at 10240 characters. @@
_______________________________________________
Bf-blender-cvs mailing list
[email protected]
https://lists.blender.org/mailman/listinfo/bf-blender-cvs