Commit: 3ea9646c1802016d23f3e6b69e06570c0c9b0cc0 Author: Sergey Sharybin Date: Tue Jul 6 13:00:57 2021 +0200 Branches: cycles-x https://developer.blender.org/rB3ea9646c1802016d23f3e6b69e06570c0c9b0cc0
Cycles X: Reduce OIDN memory usage with multi-device render Allow OIDN to modify render buffers in-place, without allocating extra temporary buffers. Currently memory is only saved for non-composited passes (combined, shadow catcher matte). The composited passes are possible to avoid memory allocation as well, but requires passing row stride to the pass accessor which is not yet possible. Differential Revision: https://developer.blender.org/D11826 =================================================================== M intern/cycles/integrator/denoiser.h M intern/cycles/integrator/denoiser_device.cpp M intern/cycles/integrator/denoiser_device.h M intern/cycles/integrator/denoiser_oidn.cpp M intern/cycles/integrator/denoiser_oidn.h M intern/cycles/integrator/path_trace.cpp =================================================================== diff --git a/intern/cycles/integrator/denoiser.h b/intern/cycles/integrator/denoiser.h index cac799f02ca..5870da694f9 100644 --- a/intern/cycles/integrator/denoiser.h +++ b/intern/cycles/integrator/denoiser.h @@ -70,10 +70,16 @@ class Denoiser { * * The `num_samples` corresponds to the number of samples in the render buffers. It is used * to scale buffers down to the "final" value in algorithms which don't do automatic exposure, - * or which needs "final" value for data passes. */ + * or which needs "final" value for data passes. + * + * The `allow_inplace_modification` means that the denoiser is allowed to do in-place + * modification of the input passes (scaling them down i.e.). This will lower the memory + * footprint of the denoiser but will make input passes "invalid" (from path tracer) point of + * view. */ virtual void denoise_buffer(const BufferParams &buffer_params, RenderBuffers *render_buffers, - const int num_samples) = 0; + const int num_samples, + bool allow_inplace_modification) = 0; /* Get a device which is used to perform actual denoising. * diff --git a/intern/cycles/integrator/denoiser_device.cpp b/intern/cycles/integrator/denoiser_device.cpp index 186b1f690b4..6d472f9883e 100644 --- a/intern/cycles/integrator/denoiser_device.cpp +++ b/intern/cycles/integrator/denoiser_device.cpp @@ -38,8 +38,12 @@ DeviceDenoiser::~DeviceDenoiser() void DeviceDenoiser::denoise_buffer(const BufferParams &buffer_params, RenderBuffers *render_buffers, - const int num_samples) + const int num_samples, + bool allow_inplace_modification) { + /* TODO(sergey): Support in-place modification to lower memory footprint. */ + (void)allow_inplace_modification; + Device *denoiser_device = get_denoiser_device(); if (!denoiser_device) { return; diff --git a/intern/cycles/integrator/denoiser_device.h b/intern/cycles/integrator/denoiser_device.h index 2bf6cb48172..282cee2bfe3 100644 --- a/intern/cycles/integrator/denoiser_device.h +++ b/intern/cycles/integrator/denoiser_device.h @@ -33,7 +33,8 @@ class DeviceDenoiser : public Denoiser { virtual void denoise_buffer(const BufferParams &buffer_params, RenderBuffers *render_buffers, - const int num_samples) override; + const int num_samples, + bool allow_inplace_modification) override; }; CCL_NAMESPACE_END diff --git a/intern/cycles/integrator/denoiser_oidn.cpp b/intern/cycles/integrator/denoiser_oidn.cpp index 72bd7cfec61..9f3e4b0b1de 100644 --- a/intern/cycles/integrator/denoiser_oidn.cpp +++ b/intern/cycles/integrator/denoiser_oidn.cpp @@ -145,6 +145,9 @@ class OIDNPass { /* For the scaled passes, the data which holds values of scaled pixels. */ array<float> scaled_buffer; + + /* For the in-place usable passes denotes whether the underlying data has been scaled. */ + bool is_scaled = false; }; class OIDNDenoiseContext { @@ -153,12 +156,14 @@ class OIDNDenoiseContext { const BufferParams &buffer_params, RenderBuffers *render_buffers, oidn::FilterRef *oidn_filter, - const int num_samples) + const int num_samples, + const bool allow_inplace_modification) : denoise_params_(denoise_params), buffer_params_(buffer_params), render_buffers_(render_buffers), oidn_filter_(oidn_filter), num_samples_(num_samples), + allow_inplace_modification_(allow_inplace_modification), pass_sample_count_(buffer_params_.get_pass_offset(PASS_SAMPLE_COUNT)) { if (denoise_params_.use_pass_albedo) { @@ -285,6 +290,7 @@ class OIDNDenoiseContext { void set_pass(OIDNPass &oidn_pass) { if (oidn_pass.use_compositing) { + /* TODO(sergey): Avoid extra memory for compositing passes. */ set_pass_scaled(oidn_pass); return; } @@ -297,6 +303,12 @@ class OIDNDenoiseContext { return; } + if (allow_inplace_modification_) { + set_pass_referenced(oidn_pass); + scale_pass_if_needed(oidn_pass); + return; + } + set_pass_scaled(oidn_pass); } @@ -372,11 +384,55 @@ class OIDNDenoiseContext { } } + void scale_pass_if_needed(OIDNPass &oidn_pass) + { + if (!oidn_pass.need_scale) { + return; + } + if (oidn_pass.is_scaled) { + return; + } + oidn_pass.is_scaled = true; + + const int64_t x = buffer_params_.full_x; + const int64_t y = buffer_params_.full_y; + const int64_t width = buffer_params_.width; + const int64_t height = buffer_params_.height; + const int64_t offset = buffer_params_.offset; + const int64_t stride = buffer_params_.stride; + const int64_t pass_stride = buffer_params_.pass_stride; + const int64_t row_stride = stride * pass_stride; + + const int64_t pixel_offset = offset + x + y * stride; + const int64_t buffer_offset = (pixel_offset * pass_stride); + + float *buffer_data = render_buffers_->buffer.data(); + + const bool has_pass_sample_count = (pass_sample_count_ != PASS_UNUSED); + + for (int y = 0; y < height; ++y) { + float *buffer_row = buffer_data + buffer_offset + y * row_stride; + for (int x = 0; x < width; ++x) { + float *buffer_pixel = buffer_row + x * pass_stride; + float *pass_pixel = buffer_pixel + oidn_pass.offset; + + const float pixel_scale = 1.0f / (has_pass_sample_count ? + __float_as_uint(buffer_pixel[pass_sample_count_]) : + num_samples_); + + pass_pixel[0] = pass_pixel[0] * pixel_scale; + pass_pixel[1] = pass_pixel[1] * pixel_scale; + pass_pixel[2] = pass_pixel[2] * pixel_scale; + } + } + } + const DenoiseParams &denoise_params_; const BufferParams &buffer_params_; RenderBuffers *render_buffers_; oidn::FilterRef *oidn_filter_; int num_samples_; + bool allow_inplace_modification_; int pass_sample_count_; /* Optional albedo and normal passes, reused by denoising of different pass types. */ @@ -389,7 +445,8 @@ class OIDNDenoiseContext { void OIDNDenoiser::denoise_buffer(const BufferParams &buffer_params, RenderBuffers *render_buffers, - const int num_samples) + const int num_samples, + bool allow_inplace_modification) { thread_scoped_lock lock(mutex_); @@ -399,7 +456,12 @@ void OIDNDenoiser::denoise_buffer(const BufferParams &buffer_params, #ifdef WITH_OPENIMAGEDENOISE oidn::FilterRef *oidn_filter = &state_->oidn_filter; - OIDNDenoiseContext context(params_, buffer_params, render_buffers, oidn_filter, num_samples); + OIDNDenoiseContext context(params_, + buffer_params, + render_buffers, + oidn_filter, + num_samples, + allow_inplace_modification); context.denoise(PASS_COMBINED); context.denoise(PASS_SHADOW_CATCHER); context.denoise(PASS_SHADOW_CATCHER_MATTE); diff --git a/intern/cycles/integrator/denoiser_oidn.h b/intern/cycles/integrator/denoiser_oidn.h index dcbd0403f53..91ae38801ea 100644 --- a/intern/cycles/integrator/denoiser_oidn.h +++ b/intern/cycles/integrator/denoiser_oidn.h @@ -36,7 +36,8 @@ class OIDNDenoiser : public Denoiser { virtual void denoise_buffer(const BufferParams &buffer_params, RenderBuffers *render_buffers, - const int num_samples) override; + const int num_samples, + bool allow_inplace_modification) override; protected: virtual uint get_device_type_mask() const override; diff --git a/intern/cycles/integrator/path_trace.cpp b/intern/cycles/integrator/path_trace.cpp index c04fb4e4838..87e5a6fb53d 100644 --- a/intern/cycles/integrator/path_trace.cpp +++ b/intern/cycles/integrator/path_trace.cpp @@ -418,6 +418,7 @@ void PathTrace::denoise(const RenderWork &render_work) RenderBuffers *buffer_to_denoise = nullptr; unique_ptr<RenderBuffers> multi_devoice_buffers; + bool allow_inplace_modification = false; if (path_trace_works_.size() == 1) { buffer_to_denoise = path_trace_works_.front()->get_render_buffers(); @@ -434,10 +435,14 @@ void PathTrace::denoise(const RenderWork &render_work) buffer_to_denoise = multi_devoice_buffers.get(); copy_to_render_buffers(multi_devoice_buffers.get()); + + allow_inplace_modification = true; } - denoiser_->denoise_buffer( - render_state_.effective_big_tile_params, buffer_to_denoise, get_num_samples_in_buffer()); + denoiser_->denoise_buffer(render_state_.effective_big_tile_params, + buffer_to_denoise, + get_num_samples_in_buffer(), + allow_inplace_modification); if (multi_devoice_buffers) { multi_devoice_buffers->copy_from_device(); _______________________________________________ Bf-blender-cvs mailing list [email protected] List details, subscription details or unsubscribe: https://lists.blender.org/mailman/listinfo/bf-blender-cvs
