This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit b8c8ac4da087ecd8793cc6b8b7fcef2444983005
Author: Andrew Wong <[email protected]>
AuthorDate: Fri Jan 31 18:15:53 2020 -0800

    util: maintenance manager comment cleanup
    
    I noticed some comments were out of date. There are no functional
    changes in this patch.
    
    Change-Id: I3e668a2982bc1bfb39dec47f63ec7e137113730a
    Reviewed-on: http://gerrit.cloudera.org:8080/15148
    Tested-by: Kudu Jenkins
    Reviewed-by: Adar Dembo <[email protected]>
---
 src/kudu/util/maintenance_manager.cc | 55 ++++++++++++++++++------------------
 1 file changed, 27 insertions(+), 28 deletions(-)

diff --git a/src/kudu/util/maintenance_manager.cc 
b/src/kudu/util/maintenance_manager.cc
index 61a6e52..3ace96e 100644
--- a/src/kudu/util/maintenance_manager.cc
+++ b/src/kudu/util/maintenance_manager.cc
@@ -63,17 +63,17 @@ DEFINE_int32(maintenance_manager_num_threads, 1,
 TAG_FLAG(maintenance_manager_num_threads, stable);
 
 DEFINE_int32(maintenance_manager_polling_interval_ms, 250,
-       "Polling interval for the maintenance manager scheduler, "
-       "in milliseconds.");
+             "Polling interval for the maintenance manager scheduler, "
+             "in milliseconds.");
 TAG_FLAG(maintenance_manager_polling_interval_ms, hidden);
 
 DEFINE_int32(maintenance_manager_history_size, 8,
-       "Number of completed operations the manager keeps track of.");
+             "Number of completed operations the manager keeps track of.");
 TAG_FLAG(maintenance_manager_history_size, hidden);
 
 DEFINE_bool(enable_maintenance_manager, true,
-       "Enable the maintenance manager, which runs flush, compaction, and "
-       "garbage collection operations on tablets.");
+            "Enable the maintenance manager, which runs flush, compaction, "
+            "and garbage collection operations on tablets.");
 TAG_FLAG(enable_maintenance_manager, unsafe);
 
 DEFINE_int64(log_target_replay_size_mb, 1024,
@@ -84,7 +84,7 @@ DEFINE_int64(log_target_replay_size_mb, 1024,
 TAG_FLAG(log_target_replay_size_mb, experimental);
 
 DEFINE_int64(data_gc_min_size_mb, 0,
-             "The (exclusive) minimum number of megabytes of ancient data on "
+             "The (exclusive) minimum number of mebibytes of ancient data on "
              "disk, per tablet, needed to prioritize deletion of that data.");
 TAG_FLAG(data_gc_min_size_mb, experimental);
 
@@ -331,26 +331,24 @@ bool 
MaintenanceManager::FindAndLaunchOp(std::unique_lock<Mutex>* guard) {
   return true;
 }
 
-// Finding the best operation goes through four filters:
-// - If there's an Op that we can run quickly that frees log retention, we run 
it.
-// - If we've hit the overall process memory limit (note: this includes memory 
that the Ops cannot
-//   free), we run the Op with the highest RAM usage.
-// - If there are Ops that are retaining logs past our target replay size, we 
run the one that has
-//   the highest retention (and if many qualify, then we run the one that also 
frees up the
-//   most RAM).
-// - Finally, if there's nothing else that we really need to do, we run the Op 
that will improve
-//   performance the most.
+// Finding the best operation goes through some filters:
+// - If there's an Op that we can run quickly that frees log retention, run it
+//   (e.g. GCing WAL segments).
+// - If we've hit the overall process memory limit (note: this includes memory
+//   that the Ops cannot free), we run the Op that retains the most WAL
+//   segments, which will free memory (e.g. MRS or DMS flush).
+// - If there Ops that are retaining logs past our target replay size, we
+//   run the one that has the highest retention, and if many qualify, then we
+//   run the one that also frees up the most RAM (e.g. MRS or DMS flush).
+// - If there are Ops that we can run that free disk space, run whichever frees
+//   the most space (e.g. GCing ancient deltas).
+// - Finally, if there's nothing else that we really need to do, we run the Op
+//   that will improve performance the most.
 //
-// The reason it's done this way is that we want to prioritize limiting the 
amount of resources we
-// hold on to. Low IO Ops go first since we can quickly run them, then we can 
look at memory usage.
-// Reversing those can starve the low IO Ops when the system is under intense 
memory pressure.
-//
-// In the third priority we're at a point where nothing's urgent and there's 
nothing we can run
-// quickly.
-// TODO(wdberkeley) We currently optimize for freeing log retention but we 
could consider having
-// some sort of sliding priority between log retention and RAM usage. For 
example, is an Op that
-// frees 128MB of log retention and 12MB of RAM always better than an op that 
frees 12MB of log
-// retention and 128MB of RAM? Maybe a more holistic approach would be better.
+// In general, we want to prioritize limiting the amount of expensive resources
+// we hold onto. Low IO ops that free WAL disk space are preferred, followed by
+// ops that free memory, then ops that free data disk space, then ops that
+// improve performance.
 pair<MaintenanceOp*, string> MaintenanceManager::FindBestOp() {
   TRACE_EVENT0("maintenance", "MaintenanceManager::FindBestOp");
 
@@ -391,8 +389,8 @@ pair<MaintenanceOp*, string> 
MaintenanceManager::FindBestOp() {
                         op->name(), logs_retained_bytes);
     }
 
-    // We prioritize ops that can free more logs, but when it's the same we 
pick
-    // the one that also frees up the most memory.
+    // We prioritize ops that can free more logs, but when it's the same we
+    // pick the one that also frees up the most memory.
     const auto ram_anchored = stats.ram_anchored();
     if (std::make_pair(logs_retained_bytes, ram_anchored) >
         std::make_pair(most_logs_retained_bytes,
@@ -455,7 +453,8 @@ pair<MaintenanceOp*, string> 
MaintenanceManager::FindBestOp() {
     return {most_logs_retained_bytes_ram_anchored_op, std::move(note)};
   }
 
-  // Look at ops that we can run quickly that free up data on disk.
+  // Look at ops that free up data on disk. To avoid starvation of
+  // performance-improving ops, we might skip freeing disk space.
   if (most_data_retained_bytes_op &&
       most_data_retained_bytes > FLAGS_data_gc_min_size_mb * 1024 * 1024) {
     if (!best_perf_improvement_op || best_perf_improvement <= 0 ||

Reply via email to