Commit: a6eae9213f237650b289751df37e3e4b67360c31 Author: Sergey Sharybin Date: Wed Jun 30 18:19:33 2021 +0200 Branches: cycles-x https://developer.blender.org/rBa6eae9213f237650b289751df37e3e4b67360c31
Cycles X: Multi-device re-balancing This is an initial implementation which seems to give better device utilization here when using two non-matched GPUs, as well as multi-GPU and CPU. General idea is to balance amount of work based on an observed performance of devices, and "re-slice" the big tile. Things which are known to be not final but considered a further development: - The balancing algorithm might need some tweaks for the objective function and weight modification to converge to the ideal balance quicker. - The "re-slicing" might also be optimized memory-wise. - Headless rendering needs to give few iterations of smaller works to allow multi-device to settle down in the balance. The balancing logic is in own little file, which simplifies process of experiments. Differential Revision: https://developer.blender.org/D11774 =================================================================== M intern/cycles/integrator/path_trace.cpp M intern/cycles/integrator/render_scheduler.cpp M intern/cycles/integrator/work_balancer.cpp =================================================================== diff --git a/intern/cycles/integrator/path_trace.cpp b/intern/cycles/integrator/path_trace.cpp index 366030df6aa..3d434c5beda 100644 --- a/intern/cycles/integrator/path_trace.cpp +++ b/intern/cycles/integrator/path_trace.cpp @@ -516,23 +516,56 @@ void PathTrace::update_display(const RenderWork &render_work) void PathTrace::rebalance(const RenderWork &render_work) { + static const int kLogLevel = 3; + + scoped_timer timer; + + const int num_works = path_trace_works_.size(); + if (!render_work.rebalance) { return; } - if (path_trace_works_.size() == 1) { - VLOG(3) << "Ignoring rebalance work due to single device render."; + if (num_works == 1) { + VLOG(kLogLevel) << "Ignoring rebalance work due to single device render."; return; } - VLOG(3) << "Perform rebalance work."; + if (VLOG_IS_ON(kLogLevel)) { + VLOG(kLogLevel) << "Perform rebalance work."; + VLOG(kLogLevel) << "Per-device path tracing time (seconds):"; + for (int i = 0; i < num_works; ++i) { + VLOG(kLogLevel) << path_trace_works_[i]->get_device()->info.description << ": " + << work_balance_infos_[i].time_spent; + } + } + + const bool did_rebalance = work_balance_do_rebalance(work_balance_infos_); - if (!work_balance_do_rebalance(work_balance_infos_)) { - VLOG(3) << "Balance in path trace works did not change."; + if (VLOG_IS_ON(kLogLevel)) { + VLOG(kLogLevel) << "Calculated per-device weights for works:"; + for (int i = 0; i < num_works; ++i) { + LOG(INFO) << path_trace_works_[i]->get_device()->info.description << ": " + << work_balance_infos_[i].weight; + } + } + + if (!did_rebalance) { + VLOG(kLogLevel) << "Balance in path trace works did not change."; return; } - /* TODO(sergey): Update buffer allocation, and copy data across devices as needed. */ + TempCPURenderBuffers big_tile_cpu_buffers(device_); + big_tile_cpu_buffers.buffers->reset(render_state_.effective_big_tile_params); + + copy_to_render_buffers(big_tile_cpu_buffers.buffers.get()); + + render_state_.need_reset_params = true; + update_work_buffer_params_if_needed(render_work); + + copy_from_render_buffers(big_tile_cpu_buffers.buffers.get()); + + VLOG(kLogLevel) << "Rebalance time (seconds): " << timer.get_time(); } void PathTrace::cancel() diff --git a/intern/cycles/integrator/render_scheduler.cpp b/intern/cycles/integrator/render_scheduler.cpp index 14b1ba69fdd..e9586075a5b 100644 --- a/intern/cycles/integrator/render_scheduler.cpp +++ b/intern/cycles/integrator/render_scheduler.cpp @@ -712,7 +712,7 @@ bool RenderScheduler::work_need_rebalance() { /* This is the minimum time, as the rebalancing can not happen more often than the path trace * work. */ - static const double kRebalanceIntervalInSeconds = 5; + static const double kRebalanceIntervalInSeconds = 1; if (state_.resolution_divider != pixel_size_) { /* Don't rebalance at a non-final resolution divider. Some reasons for this: @@ -721,6 +721,10 @@ bool RenderScheduler::work_need_rebalance() return false; } + if (state_.num_rendered_samples == 1) { + return true; + } + return (time_dt() - state_.last_rebalance_time) > kRebalanceIntervalInSeconds; } diff --git a/intern/cycles/integrator/work_balancer.cpp b/intern/cycles/integrator/work_balancer.cpp index b2a69866982..3edb8ba5598 100644 --- a/intern/cycles/integrator/work_balancer.cpp +++ b/intern/cycles/integrator/work_balancer.cpp @@ -16,6 +16,8 @@ #include "integrator/work_balancer.h" +#include "util/util_math.h" + CCL_NAMESPACE_BEGIN void work_balance_do_initial(vector<WorkBalanceInfo> &work_balance_infos) @@ -27,17 +29,108 @@ void work_balance_do_initial(vector<WorkBalanceInfo> &work_balance_infos) return; } + /* There is no statistics available, so start with an equal distribution. */ const double weight = 1.0 / num_infos; for (WorkBalanceInfo &balance_info : work_balance_infos) { balance_info.weight = weight; } } +/* Calculate time which takes for every work to complete a unit of work. + * The result times are normalized so that their sum is 1. */ +static vector<double> calculate_normalized_times_per_unit( + const vector<WorkBalanceInfo> &work_balance_infos) +{ + const int num_infos = work_balance_infos.size(); + + vector<double> times_per_unit; + times_per_unit.reserve(num_infos); + + double total_time_per_unit = 0; + for (const WorkBalanceInfo &work_balance_info : work_balance_infos) { + /* The work did `total_work * weight`, and the time per unit is + * `time_spent / (total_work * weight). The total amount of work is not known here, but it will + * gets cancelled out during normalization anyway. + * + * Note that in some degenerated cases (when amount of work is smaller than amount of workers) + * it is possible that the time and/or weight of the work is 0. */ + const double time_per_unit = work_balance_info.weight != 0 ? + work_balance_info.time_spent / work_balance_info.weight : + 0; + times_per_unit.push_back(time_per_unit); + total_time_per_unit += time_per_unit; + } + + const double total_time_per_unit_inv = 1.0 / total_time_per_unit; + for (double &time_per_unit : times_per_unit) { + time_per_unit *= total_time_per_unit_inv; + } + + return times_per_unit; +} + +/* Calculate weights for the more ideal distribution of work. + * The calculation here is based on an observed performance of every worker: the amount of work + * scheduler is proportional to the performance of the worker. Performance of the worker is an + * inverse of the time-per-unit-work. */ +static vector<double> calculate_normalized_weights( + const vector<WorkBalanceInfo> &work_balance_infos) +{ + const int num_infos = work_balance_infos.size(); + + const vector<double> times_per_unit = calculate_normalized_times_per_unit(work_balance_infos); + + vector<double> weights; + weights.reserve(num_infos); + + double total_weight = 0; + for (double time_per_unit : times_per_unit) { + /* Note that in some degenerated cases (when amount of work is smaller than amount of workers) + * it is possible that the time and/or weight of the work is 0. */ + const double weight = time_per_unit != 0 ? 1.0 / time_per_unit : 0; + total_weight += weight; + weights.push_back(weight); + } + + const double total_weight_inv = 1.0 / total_weight; + for (double &weight : weights) { + weight *= total_weight_inv; + } + + return weights; +} + +static bool apply_new_weights(vector<WorkBalanceInfo> &work_balance_infos, + const vector<double> &new_weights) +{ + const int num_infos = work_balance_infos.size(); + + bool has_big_difference = false; + for (int i = 0; i < num_infos; ++i) { + /* Apparently, there is no `ccl::fabs()`. */ + if (std::fabs(work_balance_infos[i].weight - new_weights[i]) > 0.02) { + has_big_difference = true; + } + } + + if (!has_big_difference) { + return false; + } + + for (int i = 0; i < num_infos; ++i) { + WorkBalanceInfo &info = work_balance_infos[i]; + info.weight = new_weights[i]; + info.time_spent = 0; + } + + return true; +} + bool work_balance_do_rebalance(vector<WorkBalanceInfo> &work_balance_infos) { - /* TODO(sergey): Needs implementation. */ - (void)work_balance_infos; - return false; + const vector<double> new_weights = calculate_normalized_weights(work_balance_infos); + + return apply_new_weights(work_balance_infos, new_weights); } CCL_NAMESPACE_END _______________________________________________ Bf-blender-cvs mailing list [email protected] List details, subscription details or unsubscribe: https://lists.blender.org/mailman/listinfo/bf-blender-cvs
