Commit: a0d6ca39dd29c69344792cd4a33eb92bcc76235c Author: Lukas Stockner Date: Sat Jan 14 18:57:03 2017 +0100 Branches: soc-2016-cycles_denoising https://developer.blender.org/rBa0d6ca39dd29c69344792cd4a33eb92bcc76235c
Cycles Denoising: Redesign debug EXR writing code for a cleaner integration Also, remove the denoising passes from CUDA - just adds to much clutter and the results are the same as on the CPU anyways. =================================================================== M intern/cycles/device/device_cpu.cpp M intern/cycles/device/device_cuda.cpp M intern/cycles/util/util_debug.cpp M intern/cycles/util/util_debug.h M intern/cycles/util/util_guarded_allocator.h =================================================================== diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp index 2e7844ca99..26ec530cf1 100644 --- a/intern/cycles/device/device_cpu.cpp +++ b/intern/cycles/device/device_cpu.cpp @@ -329,10 +329,7 @@ public: for(int i = 0; i < 9; i++) { buffer[i] = buffers[i] + frame_strides[i]*frame; } -#ifdef WITH_CYCLES_DEBUG_FILTER - DenoiseDebug debug((rect.z - rect.x), h, 34); -#endif - + DebugPasses debug((rect.z - rect.x), h, 34, 1, w); #define PASSPTR(i) (filter_buffer + (i)*pass_stride) @@ -349,28 +346,21 @@ public: filter_divide_shadow_kernel()(kg, sample, buffer, x, y, tile_x, tile_y, offsets, strides, unfilteredA, sampleV, sampleVV, bufferV, &rect.x); } } -#ifdef WITH_CYCLES_DEBUG_FILTER -#define WRITE_DEBUG(name, var) debug.add_pass(string_printf("shadow_%s", name), var, 1, w); - WRITE_DEBUG("unfilteredA", unfilteredA); - WRITE_DEBUG("unfilteredB", unfilteredB); - WRITE_DEBUG("bufferV", bufferV); - WRITE_DEBUG("sampleV", sampleV); - WRITE_DEBUG("sampleVV", sampleVV); -#endif + debug.add_pass("shadowUnfilteredA", unfilteredA); + debug.add_pass("shadowUnfilteredB", unfilteredB); + debug.add_pass("shadowBufferV", bufferV); + debug.add_pass("shadowSampleV", sampleV); + debug.add_pass("shadowSampleVV", sampleVV); /* Smooth the (generally pretty noisy) buffer variance using the spatial information from the sample variance. */ non_local_means(rect, bufferV, sampleV, cleanV, sampleVV, nlm_temp1, nlm_temp2, nlm_temp3, 6, 3, 4.0f, 1.0f); -#ifdef WITH_CYCLES_DEBUG_FILTER - WRITE_DEBUG("cleanV", cleanV); -#endif + debug.add_pass("shadowCleanV", cleanV); /* Use the smoothed variance to filter the two shadow half images using each other for weight calculation. */ non_local_means(rect, unfilteredA, unfilteredB, sampleV, cleanV, nlm_temp1, nlm_temp2, nlm_temp3, 5, 3, 1.0f, 0.25f); non_local_means(rect, unfilteredB, unfilteredA, bufferV, cleanV, nlm_temp1, nlm_temp2, nlm_temp3, 5, 3, 1.0f, 0.25f); -#ifdef WITH_CYCLES_DEBUG_FILTER - WRITE_DEBUG("filteredA", sampleV); - WRITE_DEBUG("filteredB", bufferV); -#endif + debug.add_pass("shadowFilteredA", sampleV); + debug.add_pass("shadowFilteredB", bufferV); /* Estimate the residual variance between the two filtered halves. */ for(int y = rect.y; y < rect.w; y++) { @@ -378,17 +368,13 @@ public: filter_combine_halves_kernel()(x, y, NULL, sampleVV, sampleV, bufferV, &rect.x, 2); } } -#ifdef WITH_CYCLES_DEBUG_FILTER - WRITE_DEBUG("residualV", sampleVV); -#endif + debug.add_pass("shadowResidualV", sampleVV); /* Use the residual variance for a second filter pass. */ non_local_means(rect, sampleV, bufferV, unfilteredA, sampleVV, nlm_temp1, nlm_temp2, nlm_temp3, 4, 2, 1.0f, 0.5f); non_local_means(rect, bufferV, sampleV, unfilteredB, sampleVV, nlm_temp1, nlm_temp2, nlm_temp3, 4, 2, 1.0f, 0.5f); -#ifdef WITH_CYCLES_DEBUG_FILTER - WRITE_DEBUG("finalA", unfilteredA); - WRITE_DEBUG("finalB", unfilteredB); -#endif + debug.add_pass("shadowFinalA", unfilteredA); + debug.add_pass("shadowFinalB", unfilteredB); /* Combine the two double-filtered halves to a final shadow feature image and associated variance. */ for(int y = rect.y; y < rect.w; y++) { @@ -396,12 +382,8 @@ public: filter_combine_halves_kernel()(x, y, PASSPTR(8), PASSPTR(9), unfilteredA, unfilteredB, &rect.x, 0); } } -#ifdef WITH_CYCLES_DEBUG_FILTER - WRITE_DEBUG("final", PASSPTR(8)); - WRITE_DEBUG("finalV", PASSPTR(9)); - debug.write(string_printf("debugf_%dx%d.exr", tile_x[1], tile_y[1])); -#undef WRITE_DEBUG -#endif + debug.add_pass("shadowFinal", PASSPTR(8)); + debug.add_pass("shadowFinalV", PASSPTR(9)); } /* ==== Step 2: Prefilter general features. ==== */ @@ -429,13 +411,9 @@ public: } } non_local_means(rect, unfiltered, unfiltered, PASSPTR(offset_to[i]), PASSPTR(offset_to[i]+1), nlm_temp1, nlm_temp2, nlm_temp3, 2, 2, 1, 0.25f); -#ifdef WITH_CYCLES_DEBUG_FILTER -#define WRITE_DEBUG(name, var) debug.add_pass(string_printf("f%d_%s", i, name), var, 1, w); - WRITE_DEBUG("unfiltered", unfiltered); - WRITE_DEBUG("sampleV", PASSPTR(offset_to[i]+1)); - WRITE_DEBUG("filtered", PASSPTR(offset_to[i])); -#undef WRITE_DEBUG -#endif + debug.add_pass(string_printf("feature%dUnfiltered", i), unfiltered); + debug.add_pass(string_printf("feature%dFiltered", i), PASSPTR(offset_to[i])); + debug.add_pass(string_printf("feature%dVariance", i), PASSPTR(offset_to[i]+1)); } } @@ -468,6 +446,8 @@ public: } } } + + debug.write(string_printf("debug_tile_%d_%d.exr", rect.x, rect.y)); } return filter_buffers; diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp index 8ec6ca6b91..4659b1dc01 100644 --- a/intern/cycles/device/device_cuda.cpp +++ b/intern/cycles/device/device_cuda.cpp @@ -993,33 +993,12 @@ public: xthreads, ythreads, 1, /* threads */ 0, 0, divide_args, 0)); -#ifdef WITH_CYCLES_DEBUG_FILTER -#define WRITE_DEBUG(name, ptr) debug_write_pfm(string_printf("debug_%dx%d_cuda_shadow_%s.pfm", rtile.x+rtile.buffers->params.overscan, rtile.y+rtile.buffers->params.overscan, name).c_str(), ptr, rtile.w, rtile.h, 1, w) - float *temp = new float[pass_stride*6]; - cuda_assert(cuMemcpyDtoH(temp, d_sampleV, 6*pass_stride*sizeof(float))); - - WRITE_DEBUG("unfilteredA", temp + 4*pass_stride); - WRITE_DEBUG("unfilteredB", temp + 5*pass_stride); - WRITE_DEBUG("bufferV", temp + 2*pass_stride); - WRITE_DEBUG("sampleV", temp + 0*pass_stride); - WRITE_DEBUG("sampleVV", temp + 1*pass_stride); -#endif - /* Smooth the (generally pretty noisy) buffer variance using the spatial information from the sample variance. */ non_local_means(rect, d_bufferV, d_sampleV, d_cleanV, d_sampleVV, d_temp1, d_temp2, d_temp3, 6, 3, 2.0f, 2.0f); -#ifdef WITH_CYCLES_DEBUG_FILTER - cuda_assert(cuMemcpyDtoH(temp, d_cleanV, pass_stride*sizeof(float))); - WRITE_DEBUG("cleanV", temp); -#endif /* Use the smoothed variance to filter the two shadow half images using each other for weight calculation. */ non_local_means(rect, d_unfilteredA, d_unfilteredB, d_sampleV, d_cleanV, d_temp1, d_temp2, d_temp3, 5, 3, 1.0f, 0.25f); non_local_means(rect, d_unfilteredB, d_unfilteredA, d_bufferV, d_cleanV, d_temp1, d_temp2, d_temp3, 5, 3, 1.0f, 0.25f); -#ifdef WITH_CYCLES_DEBUG_FILTER - cuda_assert(cuMemcpyDtoH(temp, d_sampleV, 3*pass_stride*sizeof(float))); - WRITE_DEBUG("filteredA", temp); - WRITE_DEBUG("filteredB", temp + 2*pass_stride); -#endif /* Estimate the residual variance between the two filtered halves. */ int var_r = 2; @@ -1029,19 +1008,10 @@ public: xblocks , yblocks, 1, /* blocks */ xthreads, ythreads, 1, /* threads */ 0, 0, residual_variance_args, 0)); -#ifdef WITH_CYCLES_DEBUG_FILTER - cuda_assert(cuMemcpyDtoH(temp, d_cleanV, pass_stride*sizeof(float))); - WRITE_DEBUG("residualV", temp); -#endif /* Use the residual variance for a second filter pass. */ non_local_means(rect, d_sampleV, d_bufferV, d_unfilteredA, d_cleanV, d_temp1, d_temp2, d_temp3, 4, 2, 1.0f, 1.0f); non_local_means(rect, d_bufferV, d_sampleV, d_unfilteredB, d_cleanV, d_temp1, d_temp2, d_temp3, 4, 2, 1.0f, 1.0f); -#ifdef WITH_CYCLES_DEBUG_FILTER - cuda_assert(cuMemcpyDtoH(temp, d_unfilteredA, 2*pass_stride*sizeof(float))); - WRITE_DEBUG("finalA", temp); - WRITE_DEBUG("finalB", temp + 1*pass_stride); -#endif /* Combine the two double-filtered halves to a final shadow feature image and associated variance. */ var_r = 0; @@ -1053,13 +1023,6 @@ public: xthreads, ythreads, 1, /* threads */ 0, 0, final_prefiltered_args, 0)); cuda_assert(cuCtxSynchronize()); -#ifdef WITH_CYCLES_DEBUG_FILTER - cuda_assert(cuMemcpyDtoH(temp, d_mean, 2*pass_stride*sizeof(float))); - WRITE_DEBUG("final", temp); - WRITE_DEBUG("finalV", temp + 1*pass_stride); - delete[] temp; -#undef WRITE_DEBUG -#endif } /* ==== Step 2: Prefilter general features. ==== */ @@ -1113,18 +1076,6 @@ public: } } -#ifdef WITH_CYCLES_DEBUG_FILTER -#define WRITE_DEBUG(name, pass) debug_write_pfm(string_printf("debug_%dx%d_cuda_feature%d_%s.pfm", rtile.x+rtile.buffers->params.overscan, rtile.y+rtile.buffers->params.overscan, i, name).c_str(), host_denoise_buffer+pass*pass_stride, rtile.w, rtile.h, 1, w) - float *host_denoise_buffer = new float[22*pass_stride]; - cuda_assert(cuMemcpyDtoH(host_denoise_buffer, d_denoise_buffers, 22*pass_stride*sizeof(float))); - for(int i = 0; i < 8; i++) { - WRITE_DEBUG("filtered", 2*i); - WRITE_DEBUG("variance", 2*i+1); - } - delete[] host_denoise_buffer; -#undef WRITE_DEBUG -#endif - /* Use the prefiltered feature to denoise the image. */ int storage_num = filter_area.z*filter_area.w; CUdeviceptr d_storage, d_transforms; @@ -1226,26 +1177,6 @@ public: 0, 0, finalize_args, 0)); cuda_assert(cuMemFree(d_XtWX)); cuda_assert(cuMemFree(d_XtWY)); - -#ifdef WITH_CYCLES_DEBUG_FILTER - CUDAFilterStorage *host_storage = new CUDAFilterStorage[filter_area.z*filter_area.w]; - cuda_assert(cuMemcpyDtoH(host_storage, d_storage, sizeof(CUDAFilterStorage)*filter_area.z*filter_area.w)); -#define WRITE_DEBUG(name, var) debug_write_pfm(string_printf("debug_%dx%d_cuda_%s.pfm", rtile.x+rtile.buffers->params.overscan, rtile.y+rtile.buffers->params.overscan, name).c_str(), &host_storage[0].var, filter_area.z, filter_area.w, sizeof(CUDAFilterStorage)/sizeof(float), filter_area.z); - for(int i = 0; i < DENOISE_FEATURES; i++) { - WRITE_DEBUG(string_printf("mean_%d", i).c_str(), means[i]); - WRITE_DEBUG(string_printf("scale_%d", i).c_str(), scales[i]); - WRITE_DEBUG(string_printf("singular_%d", i).c_str(), singular[i]); - WRITE_DEBUG(string_printf("bandwidth_%d", i).c_str(), bandwidth[i]); - } - WRITE_DEBUG("singula @@ Diff output truncated at 10240 characters. @@ _______________________________________________ Bf-blender-cvs mailing list Bf-blender-cvs@blender.org https://lists.blender.org/mailman/listinfo/bf-blender-cvs