Commit: 8988fcc49436226c247a5a99b3486906d1738a64 Author: Sergey Sharybin Date: Fri Jul 9 17:46:10 2021 +0200 Branches: cycles-x https://developer.blender.org/rB8988fcc49436226c247a5a99b3486906d1738a64
Cycles X: Reduce memory usage of OptiX denoiser and CPU render Allow to modify a temporary buffer created in the DeviceDenoiser. Moved denoised pass copy to a more accessible function. It is not very flexible yet, and performance might be improved, but it fits current usage and is only used from a much more compute-heavy steps of the render. Differential Revision: https://developer.blender.org/D11874 =================================================================== M intern/cycles/integrator/denoiser_device.cpp M intern/cycles/integrator/path_trace_work.cpp M intern/cycles/render/buffers.cpp M intern/cycles/render/buffers.h =================================================================== diff --git a/intern/cycles/integrator/denoiser_device.cpp b/intern/cycles/integrator/denoiser_device.cpp index 05cc400f02a..d843700c323 100644 --- a/intern/cycles/integrator/denoiser_device.cpp +++ b/intern/cycles/integrator/denoiser_device.cpp @@ -86,17 +86,17 @@ void DeviceDenoiser::denoise_buffer(const BufferParams &buffer_params, queue->copy_to_device(local_render_buffers.buffer); task.render_buffers = &local_render_buffers; + task.allow_inplace_modification = true; } denoiser_device->denoise_buffer(task); if (local_buffer_used) { - /* TODO(sergey): Only copy denoised passes. This will also allow to reduce memory usage by - * allowing in-place modification of the temporary render buffer. */ local_render_buffers.copy_from_device(); - memcpy(render_buffers->buffer.data(), - local_render_buffers.buffer.data(), - sizeof(float) * local_render_buffers.buffer.size()); + + render_buffers_host_copy_denoised( + render_buffers, buffer_params, &local_render_buffers, local_render_buffers.params); + render_buffers->copy_to_device(); } } diff --git a/intern/cycles/integrator/path_trace_work.cpp b/intern/cycles/integrator/path_trace_work.cpp index b569b7b6100..fe53b470fa6 100644 --- a/intern/cycles/integrator/path_trace_work.cpp +++ b/intern/cycles/integrator/path_trace_work.cpp @@ -115,46 +115,11 @@ void PathTraceWork::copy_from_render_buffers(const RenderBuffers *render_buffers void PathTraceWork::copy_from_denoised_render_buffers(const RenderBuffers *render_buffers) { const int64_t width = effective_buffer_params_.width; - const int64_t height = effective_buffer_params_.height; - const int64_t pass_stride = effective_buffer_params_.pass_stride; - const int64_t row_stride = width * pass_stride; - const int64_t num_pixels = width * height; - const int64_t offset_y = effective_buffer_params_.full_y - effective_big_tile_params_.full_y; - const int64_t offset_in_floats = offset_y * row_stride; + const int64_t offset = offset_y * width; - const float *src = render_buffers->buffer.data() + offset_in_floats; - float *dst = buffers_->buffer.data(); - - /* Gather pass offsets which are to be copied. */ - /* TODO(sergey): Somehow de-duplicate logic with OptiX and OpenImage denoisers, so that we don't - * have duplicated list of passes in multiple places. */ - const PassType pass_types[] = { - PASS_COMBINED, PASS_SHADOW_CATCHER, PASS_SHADOW_CATCHER_MATTE, PASS_NONE}; - int pass_offsets[PASS_NUM]; - int num_passes = 0; - for (int i = 0; i < PASS_NUM; ++i) { - if (pass_types[i] == PASS_NONE) { - break; - } - pass_offsets[i] = render_buffers->params.get_pass_offset(pass_types[i], PassMode::DENOISED); - ++num_passes; - } - - for (int i = 0; i < num_pixels; ++i, src += pass_stride, dst += pass_stride) { - for (int pass_offset_idx = 0; pass_offset_idx < num_passes; ++pass_offset_idx) { - const int pass_offset = pass_offsets[pass_offset_idx]; - if (pass_offset == PASS_UNUSED) { - continue; - } - - /* TODO(sergey): Support non-RGBA passes. */ - dst[pass_offset + 0] = src[pass_offset + 0]; - dst[pass_offset + 1] = src[pass_offset + 1]; - dst[pass_offset + 2] = src[pass_offset + 2]; - dst[pass_offset + 3] = src[pass_offset + 3]; - } - } + render_buffers_host_copy_denoised( + buffers_.get(), effective_buffer_params_, render_buffers, effective_buffer_params_, offset); copy_render_buffers_to_device(); } diff --git a/intern/cycles/render/buffers.cpp b/intern/cycles/render/buffers.cpp index 5ce31e4e308..31fe7378924 100644 --- a/intern/cycles/render/buffers.cpp +++ b/intern/cycles/render/buffers.cpp @@ -70,11 +70,11 @@ void BufferParams::update_passes(vector<Pass> &passes) for (const Pass &pass : passes) { const int index = pass_to_index(pass); - if (pass_offset_[index] == PASS_UNUSED) { - pass_offset_[index] = pass_stride; - } - if (pass.is_written()) { + if (pass_offset_[index] == PASS_UNUSED) { + pass_offset_[index] = pass_stride; + } + pass_stride += pass.get_info().num_components; } } @@ -183,4 +183,71 @@ void RenderBuffers::copy_to_device() buffer.copy_to_device(); } +void render_buffers_host_copy_denoised(RenderBuffers *dst, + const BufferParams &dst_params, + const RenderBuffers *src, + const BufferParams &src_params, + const size_t src_offset) +{ + DCHECK_EQ(dst_params.width, src_params.width); + /* TODO(sergey): More sanity checks to avoid buffer overrun. */ + + /* Create a map of pass ofsets to be copied. + * Assume offsets are different to allow copying passes between buffers with different set of + * passes. */ + + struct { + int dst_offset; + int src_offset; + } pass_offsets[PASS_NUM]; + + int num_passes = 0; + + for (int i = 0; i < PASS_NUM; ++i) { + const PassType pass_type = static_cast<PassType>(i); + + const int dst_pass_offset = dst_params.get_pass_offset(pass_type, PassMode::DENOISED); + if (dst_pass_offset == PASS_UNUSED) { + continue; + } + + const int src_pass_offset = src_params.get_pass_offset(pass_type, PassMode::DENOISED); + if (src_pass_offset == PASS_UNUSED) { + continue; + } + + pass_offsets[num_passes].dst_offset = dst_pass_offset; + pass_offsets[num_passes].src_offset = src_pass_offset; + ++num_passes; + } + + /* Copy passes. */ + /* TODO(sergey): Make it more reusable, allowing implement copy of noisy passes. */ + + const int64_t dst_width = dst_params.width; + const int64_t dst_height = dst_params.height; + const int64_t dst_pass_stride = dst_params.pass_stride; + const int64_t dst_num_pixels = dst_width * dst_height; + + const int64_t src_pass_stride = src_params.pass_stride; + const int64_t src_offset_in_floats = src_offset * src_pass_stride; + + const float *src_pixel = src->buffer.data() + src_offset_in_floats; + float *dst_pixel = dst->buffer.data(); + + for (int i = 0; i < dst_num_pixels; + ++i, src_pixel += src_pass_stride, dst_pixel += dst_pass_stride) { + for (int pass_offset_idx = 0; pass_offset_idx < num_passes; ++pass_offset_idx) { + const int dst_pass_offset = pass_offsets[pass_offset_idx].dst_offset; + const int src_pass_offset = pass_offsets[pass_offset_idx].src_offset; + + /* TODO(sergey): Support non-RGBA passes. */ + dst_pixel[dst_pass_offset + 0] = src_pixel[src_pass_offset + 0]; + dst_pixel[dst_pass_offset + 1] = src_pixel[src_pass_offset + 1]; + dst_pixel[dst_pass_offset + 2] = src_pixel[src_pass_offset + 2]; + dst_pixel[dst_pass_offset + 3] = src_pixel[src_pass_offset + 3]; + } + } +} + CCL_NAMESPACE_END diff --git a/intern/cycles/render/buffers.h b/intern/cycles/render/buffers.h index 93ea5393554..8077a30b87a 100644 --- a/intern/cycles/render/buffers.h +++ b/intern/cycles/render/buffers.h @@ -98,6 +98,21 @@ class RenderBuffers { void copy_to_device(); }; +/* Copy denoised passes form source to destination. + * + * Buffer parameters are provided explicitly, allowing to copy pixelks between render buffers which + * content corresponds to a render result at a non-unit resolution divider. + * + * `src_offset` allows to offset source pixel index which is used when a fraction of the source + * buffer is to be copied. + * + * Copy happens of the number of pixels in the destination. */ +void render_buffers_host_copy_denoised(RenderBuffers *dst, + const BufferParams &dst_params, + const RenderBuffers *src, + const BufferParams &src_params, + const size_t src_offset = 0); + /* Render Tile * Rendering task on a buffer */ _______________________________________________ Bf-blender-cvs mailing list [email protected] List details, subscription details or unsubscribe: https://lists.blender.org/mailman/listinfo/bf-blender-cvs
