[Bf-blender-cvs] [a0d6ca39dd] soc-2016-cycles_denoising: Cycles Denoising: Redesign debug EXR writing code for a cleaner integration

Lukas Stockner Tue, 31 Jan 2017 20:19:47 -0800

Commit: a0d6ca39dd29c69344792cd4a33eb92bcc76235c
Author: Lukas Stockner
Date:   Sat Jan 14 18:57:03 2017 +0100
Branches: soc-2016-cycles_denoising
https://developer.blender.org/rBa0d6ca39dd29c69344792cd4a33eb92bcc76235c


Cycles Denoising: Redesign debug EXR writing code for a cleaner integration

Also, remove the denoising passes from CUDA - just adds to much clutter and the 
results are the same as on the CPU anyways.

===================================================================

M       intern/cycles/device/device_cpu.cpp
M       intern/cycles/device/device_cuda.cpp
M       intern/cycles/util/util_debug.cpp
M       intern/cycles/util/util_debug.h
M       intern/cycles/util/util_guarded_allocator.h

===================================================================

diff --git a/intern/cycles/device/device_cpu.cpp 
b/intern/cycles/device/device_cpu.cpp
index 2e7844ca99..26ec530cf1 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -329,10 +329,7 @@ public:
                        for(int i = 0; i < 9; i++) {
                                buffer[i] = buffers[i] + frame_strides[i]*frame;
                        }
-#ifdef WITH_CYCLES_DEBUG_FILTER
-                       DenoiseDebug debug((rect.z - rect.x), h, 34);
-#endif
-
+                       DebugPasses debug((rect.z - rect.x), h, 34, 1, w);
 
 #define PASSPTR(i) (filter_buffer + (i)*pass_stride)
 
@@ -349,28 +346,21 @@ public:
                                                
filter_divide_shadow_kernel()(kg, sample, buffer, x, y, tile_x, tile_y, 
offsets, strides, unfilteredA, sampleV, sampleVV, bufferV, &rect.x);
                                        }
                                }
-#ifdef WITH_CYCLES_DEBUG_FILTER
-#define WRITE_DEBUG(name, var) debug.add_pass(string_printf("shadow_%s", 
name), var, 1, w);
-                               WRITE_DEBUG("unfilteredA", unfilteredA);
-                               WRITE_DEBUG("unfilteredB", unfilteredB);
-                               WRITE_DEBUG("bufferV", bufferV);
-                               WRITE_DEBUG("sampleV", sampleV);
-                               WRITE_DEBUG("sampleVV", sampleVV);
-#endif
+                               debug.add_pass("shadowUnfilteredA", 
unfilteredA);
+                               debug.add_pass("shadowUnfilteredB", 
unfilteredB);
+                               debug.add_pass("shadowBufferV", bufferV);
+                               debug.add_pass("shadowSampleV", sampleV);
+                               debug.add_pass("shadowSampleVV", sampleVV);
 
                                /* Smooth the (generally pretty noisy) buffer 
variance using the spatial information from the sample variance. */
                                non_local_means(rect, bufferV, sampleV, cleanV, 
sampleVV, nlm_temp1, nlm_temp2, nlm_temp3, 6, 3, 4.0f, 1.0f);
-#ifdef WITH_CYCLES_DEBUG_FILTER
-                               WRITE_DEBUG("cleanV", cleanV);
-#endif
+                               debug.add_pass("shadowCleanV", cleanV);
 
                                /* Use the smoothed variance to filter the two 
shadow half images using each other for weight calculation. */
                                non_local_means(rect, unfilteredA, unfilteredB, 
sampleV, cleanV, nlm_temp1, nlm_temp2, nlm_temp3, 5, 3, 1.0f, 0.25f);
                                non_local_means(rect, unfilteredB, unfilteredA, 
bufferV, cleanV, nlm_temp1, nlm_temp2, nlm_temp3, 5, 3, 1.0f, 0.25f);
-#ifdef WITH_CYCLES_DEBUG_FILTER
-                               WRITE_DEBUG("filteredA", sampleV);
-                               WRITE_DEBUG("filteredB", bufferV);
-#endif
+                               debug.add_pass("shadowFilteredA", sampleV);
+                               debug.add_pass("shadowFilteredB", bufferV);
 
                                /* Estimate the residual variance between the 
two filtered halves. */
                                for(int y = rect.y; y < rect.w; y++) {
@@ -378,17 +368,13 @@ public:
                                                
filter_combine_halves_kernel()(x, y, NULL, sampleVV, sampleV, bufferV, &rect.x, 
2);
                                        }
                                }
-#ifdef WITH_CYCLES_DEBUG_FILTER
-                               WRITE_DEBUG("residualV", sampleVV);
-#endif
+                               debug.add_pass("shadowResidualV", sampleVV);
 
                                /* Use the residual variance for a second 
filter pass. */
                                non_local_means(rect, sampleV, bufferV, 
unfilteredA, sampleVV, nlm_temp1, nlm_temp2, nlm_temp3, 4, 2, 1.0f, 0.5f);
                                non_local_means(rect, bufferV, sampleV, 
unfilteredB, sampleVV, nlm_temp1, nlm_temp2, nlm_temp3, 4, 2, 1.0f, 0.5f);
-#ifdef WITH_CYCLES_DEBUG_FILTER
-                               WRITE_DEBUG("finalA", unfilteredA);
-                               WRITE_DEBUG("finalB", unfilteredB);
-#endif
+                               debug.add_pass("shadowFinalA", unfilteredA);
+                               debug.add_pass("shadowFinalB", unfilteredB);
 
                                /* Combine the two double-filtered halves to a 
final shadow feature image and associated variance. */
                                for(int y = rect.y; y < rect.w; y++) {
@@ -396,12 +382,8 @@ public:
                                                
filter_combine_halves_kernel()(x, y, PASSPTR(8), PASSPTR(9), unfilteredA, 
unfilteredB, &rect.x, 0);
                                        }
                                }
-#ifdef WITH_CYCLES_DEBUG_FILTER
-                               WRITE_DEBUG("final", PASSPTR(8));
-                               WRITE_DEBUG("finalV", PASSPTR(9));
-                               debug.write(string_printf("debugf_%dx%d.exr", 
tile_x[1], tile_y[1]));
-#undef WRITE_DEBUG
-#endif
+                               debug.add_pass("shadowFinal", PASSPTR(8));
+                               debug.add_pass("shadowFinalV", PASSPTR(9));
                        }
 
                        /* ==== Step 2: Prefilter general features. ==== */
@@ -429,13 +411,9 @@ public:
                                                }
                                        }
                                        non_local_means(rect, unfiltered, 
unfiltered, PASSPTR(offset_to[i]), PASSPTR(offset_to[i]+1), nlm_temp1, 
nlm_temp2, nlm_temp3, 2, 2, 1, 0.25f);
-#ifdef WITH_CYCLES_DEBUG_FILTER
-#define WRITE_DEBUG(name, var) debug.add_pass(string_printf("f%d_%s", i, 
name), var, 1, w);
-                                       WRITE_DEBUG("unfiltered", unfiltered);
-                                       WRITE_DEBUG("sampleV", 
PASSPTR(offset_to[i]+1));
-                                       WRITE_DEBUG("filtered", 
PASSPTR(offset_to[i]));
-#undef WRITE_DEBUG
-#endif
+                                       
debug.add_pass(string_printf("feature%dUnfiltered", i), unfiltered);
+                                       
debug.add_pass(string_printf("feature%dFiltered", i), PASSPTR(offset_to[i]));
+                                       
debug.add_pass(string_printf("feature%dVariance", i), PASSPTR(offset_to[i]+1));
                                }
                        }
 
@@ -468,6 +446,8 @@ public:
                                        }
                                }
                        }
+
+                       debug.write(string_printf("debug_tile_%d_%d.exr", 
rect.x, rect.y));
                }
 
                return filter_buffers;
diff --git a/intern/cycles/device/device_cuda.cpp 
b/intern/cycles/device/device_cuda.cpp
index 8ec6ca6b91..4659b1dc01 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -993,33 +993,12 @@ public:
                                                           xthreads, ythreads, 
1, /* threads */
                                                           0, 0, divide_args, 
0));
 
-#ifdef WITH_CYCLES_DEBUG_FILTER
-#define WRITE_DEBUG(name, ptr) 
debug_write_pfm(string_printf("debug_%dx%d_cuda_shadow_%s.pfm", 
rtile.x+rtile.buffers->params.overscan, rtile.y+rtile.buffers->params.overscan, 
name).c_str(), ptr, rtile.w, rtile.h, 1, w)
-                               float *temp = new float[pass_stride*6];
-                               cuda_assert(cuMemcpyDtoH(temp, d_sampleV, 
6*pass_stride*sizeof(float)));
-
-                               WRITE_DEBUG("unfilteredA", temp + 
4*pass_stride);
-                               WRITE_DEBUG("unfilteredB", temp + 
5*pass_stride);
-                               WRITE_DEBUG("bufferV", temp + 2*pass_stride);
-                               WRITE_DEBUG("sampleV", temp + 0*pass_stride);
-                               WRITE_DEBUG("sampleVV", temp + 1*pass_stride);
-#endif
-
                                /* Smooth the (generally pretty noisy) buffer 
variance using the spatial information from the sample variance. */
                                non_local_means(rect, d_bufferV, d_sampleV, 
d_cleanV, d_sampleVV, d_temp1, d_temp2, d_temp3, 6, 3, 2.0f, 2.0f);
-#ifdef WITH_CYCLES_DEBUG_FILTER
-                               cuda_assert(cuMemcpyDtoH(temp, d_cleanV, 
pass_stride*sizeof(float)));
-                               WRITE_DEBUG("cleanV", temp);
-#endif
 
                                /* Use the smoothed variance to filter the two 
shadow half images using each other for weight calculation. */
                                non_local_means(rect, d_unfilteredA, 
d_unfilteredB, d_sampleV, d_cleanV, d_temp1, d_temp2, d_temp3, 5, 3, 1.0f, 
0.25f);
                                non_local_means(rect, d_unfilteredB, 
d_unfilteredA, d_bufferV, d_cleanV, d_temp1, d_temp2, d_temp3, 5, 3, 1.0f, 
0.25f);
-#ifdef WITH_CYCLES_DEBUG_FILTER
-                               cuda_assert(cuMemcpyDtoH(temp, d_sampleV, 
3*pass_stride*sizeof(float)));
-                               WRITE_DEBUG("filteredA", temp);
-                               WRITE_DEBUG("filteredB", temp + 2*pass_stride);
-#endif
 
                                /* Estimate the residual variance between the 
two filtered halves. */
                                int var_r = 2;
@@ -1029,19 +1008,10 @@ public:
                                                           xblocks , yblocks, 
1, /* blocks */
                                                           xthreads, ythreads, 
1, /* threads */
                                                           0, 0, 
residual_variance_args, 0));
-#ifdef WITH_CYCLES_DEBUG_FILTER
-                               cuda_assert(cuMemcpyDtoH(temp, d_cleanV, 
pass_stride*sizeof(float)));
-                               WRITE_DEBUG("residualV", temp);
-#endif
 
                                /* Use the residual variance for a second 
filter pass. */
                                non_local_means(rect, d_sampleV, d_bufferV, 
d_unfilteredA, d_cleanV, d_temp1, d_temp2, d_temp3, 4, 2, 1.0f, 1.0f);
                                non_local_means(rect, d_bufferV, d_sampleV, 
d_unfilteredB, d_cleanV, d_temp1, d_temp2, d_temp3, 4, 2, 1.0f, 1.0f);
-#ifdef WITH_CYCLES_DEBUG_FILTER
-                               cuda_assert(cuMemcpyDtoH(temp, d_unfilteredA, 
2*pass_stride*sizeof(float)));
-                               WRITE_DEBUG("finalA", temp);
-                               WRITE_DEBUG("finalB", temp + 1*pass_stride);
-#endif
 
                                /* Combine the two double-filtered halves to a 
final shadow feature image and associated variance. */
                                var_r = 0;
@@ -1053,13 +1023,6 @@ public:
                                                           xthreads, ythreads, 
1, /* threads */
                                                           0, 0, 
final_prefiltered_args, 0));
                                cuda_assert(cuCtxSynchronize());
-#ifdef WITH_CYCLES_DEBUG_FILTER
-                               cuda_assert(cuMemcpyDtoH(temp, d_mean, 
2*pass_stride*sizeof(float)));
-                               WRITE_DEBUG("final", temp);
-                               WRITE_DEBUG("finalV", temp + 1*pass_stride);
-                               delete[] temp;
-#undef WRITE_DEBUG
-#endif
                        }
 
                        /* ==== Step 2: Prefilter general features. ==== */
@@ -1113,18 +1076,6 @@ public:
                        }
                }
 
-#ifdef WITH_CYCLES_DEBUG_FILTER
-#define WRITE_DEBUG(name, pass) 
debug_write_pfm(string_printf("debug_%dx%d_cuda_feature%d_%s.pfm", 
rtile.x+rtile.buffers->params.overscan, rtile.y+rtile.buffers->params.overscan, 
i, name).c_str(), host_denoise_buffer+pass*pass_stride, rtile.w, rtile.h, 1, w)
-               float *host_denoise_buffer = new float[22*pass_stride];
-               cuda_assert(cuMemcpyDtoH(host_denoise_buffer, 
d_denoise_buffers, 22*pass_stride*sizeof(float)));
-               for(int i = 0; i < 8; i++) {
-                       WRITE_DEBUG("filtered", 2*i);
-                       WRITE_DEBUG("variance", 2*i+1);
-               }
-               delete[] host_denoise_buffer;
-#undef WRITE_DEBUG
-#endif
-
                /* Use the prefiltered feature to denoise the image. */
                int storage_num = filter_area.z*filter_area.w;
                CUdeviceptr d_storage, d_transforms;
@@ -1226,26 +1177,6 @@ public:
                                           0, 0, finalize_args, 0));
                cuda_assert(cuMemFree(d_XtWX));
                cuda_assert(cuMemFree(d_XtWY));
-
-#ifdef WITH_CYCLES_DEBUG_FILTER
-               CUDAFilterStorage *host_storage = new 
CUDAFilterStorage[filter_area.z*filter_area.w];
-               cuda_assert(cuMemcpyDtoH(host_storage, d_storage, 
sizeof(CUDAFilterStorage)*filter_area.z*filter_area.w));
-#define WRITE_DEBUG(name, var) 
debug_write_pfm(string_printf("debug_%dx%d_cuda_%s.pfm", 
rtile.x+rtile.buffers->params.overscan, rtile.y+rtile.buffers->params.overscan, 
name).c_str(), &host_storage[0].var, filter_area.z, filter_area.w, 
sizeof(CUDAFilterStorage)/sizeof(float), filter_area.z);
-               for(int i = 0; i < DENOISE_FEATURES; i++) {
-                       WRITE_DEBUG(string_printf("mean_%d", i).c_str(), 
means[i]);
-                       WRITE_DEBUG(string_printf("scale_%d", i).c_str(), 
scales[i]);
-                       WRITE_DEBUG(string_printf("singular_%d", i).c_str(), 
singular[i]);
-                       WRITE_DEBUG(string_printf("bandwidth_%d", i).c_str(), 
bandwidth[i]);
-               }
-               WRITE_DEBUG("singula

@@ Diff output truncated at 10240 characters. @@

_______________________________________________
Bf-blender-cvs mailing list
Bf-blender-cvs@blender.org
https://lists.blender.org/mailman/listinfo/bf-blender-cvs

[Bf-blender-cvs] [a0d6ca39dd] soc-2016-cycles_denoising: Cycles Denoising: Redesign debug EXR writing code for a cleaner integration

Reply via email to