This is an automated email from the ASF dual-hosted git repository.

snlee pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new 36c82b66b9 report minion task metadata last update time as metric 
(#9954)
36c82b66b9 is described below

commit 36c82b66b9845e8f1be7c8f3070656099d10ecfa
Author: Haitao Zhang <[email protected]>
AuthorDate: Wed Dec 21 12:51:54 2022 -0800

    report minion task metadata last update time as metric (#9954)
    
    * report minion task metadata last update time as metric
    
    * fix style problem
    
    * address comments
    
    * fix format
    
    * remove nonnull
---
 .../configs/controller.yml                         |  7 +++
 .../etc/jmx_prometheus_javaagent/configs/pinot.yml |  7 +++
 .../pinot/common/metrics/ControllerGauge.java      |  1 +
 .../common/minion/MinionTaskMetadataUtils.java     | 65 ++++++++++++++++++++++
 .../common/utils/helix/FakePropertyStore.java      | 23 +++++---
 .../common/minion/MinionTaskMetadataUtilsTest.java | 62 +++++++++++++++++++++
 .../core/minion/PinotHelixTaskResourceManager.java | 10 ++++
 .../helix/core/minion/TaskMetricsEmitter.java      |  9 +++
 8 files changed, 177 insertions(+), 7 deletions(-)

diff --git 
a/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/controller.yml 
b/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/controller.yml
index 253c43e069..edf96a9f2a 100644
--- a/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/controller.yml
+++ b/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/controller.yml
@@ -112,6 +112,13 @@ rules:
   labels:
     taskType: "$1"
     status: "$2"
+- pattern: "\"org.apache.pinot.common.metrics\"<type=\"ControllerMetrics\", 
name=\"pinot.controller.timeMsSinceLastMinionTaskMetadataUpdate.(\\w+)_(\\w+)\\.(\\w+)\"><>(\\w+)"
+  name: "pinot_controller_timeMsSinceLastMinionTaskMetadataUpdate_$4"
+  cache: true
+  labels:
+    table: "$1"
+    tableType: "$2"
+    taskType: "$3"
 - pattern: "\"org.apache.pinot.common.metrics\"<type=\"ControllerMetrics\", 
name=\"pinot.controller.pinotLeadControllerResourceEnabled\"><>(\\w+)"
   name: "pinot_controller_pinotLeadControllerResourceEnabled_$1"
   cache: true
diff --git a/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/pinot.yml 
b/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/pinot.yml
index aa958b6152..6a63001bfc 100644
--- a/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/pinot.yml
+++ b/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/pinot.yml
@@ -101,6 +101,13 @@ rules:
   labels:
     taskType: "$1"
     status: "$2"
+- pattern: "\"org.apache.pinot.common.metrics\"<type=\"ControllerMetrics\", 
name=\"pinot.controller.timeMsSinceLastMinionTaskMetadataUpdate.(\\w+)_(\\w+)\\.(\\w+)\"><>(\\w+)"
+  name: "pinot_controller_timeMsSinceLastMinionTaskMetadataUpdate_$4"
+  cache: true
+  labels:
+    table: "$1"
+    tableType: "$2"
+    taskType: "$3"
 - pattern: "\"org.apache.pinot.common.metrics\"<type=\"ControllerMetrics\", 
name=\"pinot.controller.pinotLeadControllerResourceEnabled\"><>(\\w+)"
   name: "pinot_controller_pinotLeadControllerResourceEnabled_$1"
   cache: true
diff --git 
a/pinot-common/src/main/java/org/apache/pinot/common/metrics/ControllerGauge.java
 
b/pinot-common/src/main/java/org/apache/pinot/common/metrics/ControllerGauge.java
index fdf7c918ac..5df7959d5c 100644
--- 
a/pinot-common/src/main/java/org/apache/pinot/common/metrics/ControllerGauge.java
+++ 
b/pinot-common/src/main/java/org/apache/pinot/common/metrics/ControllerGauge.java
@@ -52,6 +52,7 @@ public enum ControllerGauge implements AbstractMetrics.Gauge {
   OFFLINE_TABLE_COUNT("TableCount", true),
   DISABLED_TABLE_COUNT("TableCount", true),
   PERIODIC_TASK_NUM_TABLES_PROCESSED("PeriodicTaskNumTablesProcessed", true),
+  
TIME_MS_SINCE_LAST_MINION_TASK_METADATA_UPDATE("TimeMsSinceLastMinionTaskMetadataUpdate",
 false),
   NUM_MINION_TASKS_IN_PROGRESS("NumMinionTasksInProgress", true),
   NUM_MINION_SUBTASKS_WAITING("NumMinionSubtasksWaiting", true),
   NUM_MINION_SUBTASKS_RUNNING("NumMinionSubtasksRunning", true),
diff --git 
a/pinot-common/src/main/java/org/apache/pinot/common/minion/MinionTaskMetadataUtils.java
 
b/pinot-common/src/main/java/org/apache/pinot/common/minion/MinionTaskMetadataUtils.java
index e493849a8e..7baf13bfc1 100644
--- 
a/pinot-common/src/main/java/org/apache/pinot/common/minion/MinionTaskMetadataUtils.java
+++ 
b/pinot-common/src/main/java/org/apache/pinot/common/minion/MinionTaskMetadataUtils.java
@@ -18,13 +18,17 @@
  */
 package org.apache.pinot.common.minion;
 
+import java.util.HashMap;
 import java.util.List;
+import java.util.Map;
 import javax.annotation.Nullable;
 import org.apache.helix.AccessOption;
 import org.apache.helix.store.HelixPropertyStore;
 import org.apache.helix.zookeeper.datamodel.ZNRecord;
 import org.apache.helix.zookeeper.zkclient.exception.ZkException;
 import org.apache.pinot.common.metadata.ZKMetadataProvider;
+import org.apache.pinot.spi.config.table.TableType;
+import org.apache.pinot.spi.utils.StringUtil;
 import org.apache.zookeeper.data.Stat;
 
 
@@ -63,6 +67,67 @@ public final class MinionTaskMetadataUtils {
     return znRecord;
   }
 
+  /**
+   * Gets the last update time (in ms) of all minion task metadata.
+   * @param propertyStore the property store where all minion task metadata is 
stored.
+   * @return a map storing the last update time (in ms) of all minion task 
metadata: (tableNameWithType -> taskType
+   *         -> last update time in ms)
+   */
+  public static Map<String, Map<String, Long>> 
getAllTaskMetadataLastUpdateTimeMs(
+      HelixPropertyStore<ZNRecord> propertyStore) {
+    Map<String, Map<String, Long>> tableTaskLastUpdateTimeMsMap = new 
HashMap<>();
+    String propertyStorePathForMinionTaskMetadataPrefix =
+        ZKMetadataProvider.getPropertyStorePathForMinionTaskMetadataPrefix();
+    // the old and new path may exist at the same time
+    List<String> tableNameWithTypeOrTaskTypes =
+        
propertyStore.getChildNames(propertyStorePathForMinionTaskMetadataPrefix, 
AccessOption.PERSISTENT);
+    if (tableNameWithTypeOrTaskTypes == null || 
tableNameWithTypeOrTaskTypes.isEmpty()) {
+      return tableTaskLastUpdateTimeMsMap;
+    }
+    for (String tableNameWithTypeOrTaskType : tableNameWithTypeOrTaskTypes) {
+      String metadataNodeDirectParentPath =
+          StringUtil.join("/", propertyStorePathForMinionTaskMetadataPrefix, 
tableNameWithTypeOrTaskType);
+      List<String> metadataNodeNames =
+          propertyStore.getChildNames(metadataNodeDirectParentPath, 
AccessOption.PERSISTENT);
+      if (metadataNodeNames == null || metadataNodeNames.isEmpty()) {
+        continue;
+      }
+      // the new path is MINION_TASK_METADATA/${tableNameWthType}/${taskType}
+      // the old path is MINION_TASK_METADATA/${taskType}/${tableNameWthType}
+      // The variable tableNameWithTypeOrTaskType stores the first level child 
name of MINION_TASK_METADATA, that's why
+      // when it ends with OFFLINE or REALTIME, it is a new path.
+      boolean isNewPath =
+          tableNameWithTypeOrTaskType.endsWith(TableType.OFFLINE.toString()) 
|| tableNameWithTypeOrTaskType.endsWith(
+              TableType.REALTIME.toString());
+      for (String metadataNodeName : metadataNodeNames) {
+        String metadataNodePath = StringUtil.join("/", 
metadataNodeDirectParentPath, metadataNodeName);
+        Stat stat = propertyStore.getStat(metadataNodePath, 
AccessOption.PERSISTENT);
+        if (isNewPath) {
+          saveOrUpdateTaskMetadataLastUpdateTime(tableNameWithTypeOrTaskType, 
metadataNodeName, stat.getMtime(),
+              tableTaskLastUpdateTimeMsMap);
+        } else {
+          saveOrUpdateTaskMetadataLastUpdateTime(metadataNodeName, 
tableNameWithTypeOrTaskType, stat.getMtime(),
+              tableTaskLastUpdateTimeMsMap);
+        }
+      }
+    }
+    return tableTaskLastUpdateTimeMsMap;
+  }
+
+  private static void saveOrUpdateTaskMetadataLastUpdateTime(String 
tableNameWithType, String taskType,
+      long newLastUpdateTimeMs, Map<String, Map<String, Long>> 
tableTaskLastUpdateTimeMsMap) {
+    tableTaskLastUpdateTimeMsMap
+        .computeIfAbsent(tableNameWithType, tnt -> new HashMap<>())
+        .compute(taskType, (tt, lastUpdateTimeMs) -> {
+          if (lastUpdateTimeMs == null) {
+            return newLastUpdateTimeMs;
+          } else {
+            // the metadata may be saved in two different places, use the 
larger one
+            return Math.max(lastUpdateTimeMs, newLastUpdateTimeMs);
+          }
+        });
+  }
+
   /**
    * Deletes the minion task metadata ZNRecord for the given minion task and 
tableName, from both the new path
    * MINION_TASK_METADATA/${tableNameWthType}/${taskType} and the old path
diff --git 
a/pinot-common/src/main/java/org/apache/pinot/common/utils/helix/FakePropertyStore.java
 
b/pinot-common/src/main/java/org/apache/pinot/common/utils/helix/FakePropertyStore.java
index daad67f0de..2c5806aab9 100644
--- 
a/pinot-common/src/main/java/org/apache/pinot/common/utils/helix/FakePropertyStore.java
+++ 
b/pinot-common/src/main/java/org/apache/pinot/common/utils/helix/FakePropertyStore.java
@@ -31,6 +31,7 @@ import org.apache.zookeeper.data.Stat;
 
 public class FakePropertyStore extends ZkHelixPropertyStore<ZNRecord> {
   private Map<String, ZNRecord> _contents = new HashMap<>();
+  private Map<String, Stat> _statMap = new HashMap<>();
   private IZkDataListener _listener = null;
 
   public FakePropertyStore() {
@@ -42,6 +43,11 @@ public class FakePropertyStore extends 
ZkHelixPropertyStore<ZNRecord> {
     return _contents.get(path);
   }
 
+  @Override
+  public Stat getStat(String path, int options) {
+    return _statMap.get(path);
+  }
+
   @Override
   public List<String> getChildNames(String parentPath, int options) {
     return _contents.keySet().stream()
@@ -62,9 +68,9 @@ public class FakePropertyStore extends 
ZkHelixPropertyStore<ZNRecord> {
   }
 
   @Override
-  public boolean set(String path, ZNRecord stat, int expectedVersion, int 
options) {
+  public boolean set(String path, ZNRecord record, int expectedVersion, int 
options) {
     try {
-      setContents(path, stat);
+      setContentAndStat(path, record);
       return true;
     } catch (Exception e) {
       return false;
@@ -72,9 +78,9 @@ public class FakePropertyStore extends 
ZkHelixPropertyStore<ZNRecord> {
   }
 
   @Override
-  public boolean set(String path, ZNRecord stat, int options) {
+  public boolean set(String path, ZNRecord record, int options) {
     try {
-      setContents(path, stat);
+      setContentAndStat(path, record);
       return true;
     } catch (Exception e) {
       return false;
@@ -88,11 +94,14 @@ public class FakePropertyStore extends 
ZkHelixPropertyStore<ZNRecord> {
     return true;
   }
 
-  public void setContents(String path, ZNRecord contents)
+  public void setContentAndStat(String path, ZNRecord record)
       throws Exception {
-    _contents.put(path, contents);
+    _contents.put(path, record);
+    Stat stat = new Stat();
+    stat.setMtime(System.currentTimeMillis());
+    _statMap.put(path, stat);
     if (_listener != null) {
-      _listener.handleDataChange(path, contents);
+      _listener.handleDataChange(path, record);
     }
   }
 
diff --git 
a/pinot-common/src/test/java/org/apache/pinot/common/minion/MinionTaskMetadataUtilsTest.java
 
b/pinot-common/src/test/java/org/apache/pinot/common/minion/MinionTaskMetadataUtilsTest.java
index e2f0b39a3f..5f40c2eb22 100644
--- 
a/pinot-common/src/test/java/org/apache/pinot/common/minion/MinionTaskMetadataUtilsTest.java
+++ 
b/pinot-common/src/test/java/org/apache/pinot/common/minion/MinionTaskMetadataUtilsTest.java
@@ -18,6 +18,7 @@
  */
 package org.apache.pinot.common.minion;
 
+import java.util.Map;
 import org.apache.helix.AccessOption;
 import org.apache.helix.store.HelixPropertyStore;
 import org.apache.helix.zookeeper.datamodel.ZNRecord;
@@ -76,6 +77,67 @@ public class MinionTaskMetadataUtilsTest {
         NEW_TASK_METADATA.toZNRecord());
   }
 
+  @Test
+  public void testGetAllTaskMetadataLastUpdateTimeMs() {
+    // no task metadata exists
+    HelixPropertyStore<ZNRecord> propertyStore = new FakePropertyStore();
+    
assertTrue(MinionTaskMetadataUtils.getAllTaskMetadataLastUpdateTimeMs(propertyStore).isEmpty());
+
+    // only the old metadata path exists
+    propertyStore = new FakePropertyStore();
+    long tsBeforeSet = System.currentTimeMillis();
+    propertyStore.set(OLD_MINION_METADATA_PATH, 
OLD_TASK_METADATA.toZNRecord(), EXPECTED_VERSION, ACCESS_OPTION);
+    long tsAfterSet = System.currentTimeMillis();
+    Map<String, Map<String, Long>> allTaskMetadataLastUpdateTimeMs =
+        
MinionTaskMetadataUtils.getAllTaskMetadataLastUpdateTimeMs(propertyStore);
+    assertEquals(allTaskMetadataLastUpdateTimeMs.size(), 1);
+    Map<String, Long> taskTypeLastUpdateMsMap = 
allTaskMetadataLastUpdateTimeMs.get(TABLE_NAME_WITH_TYPE);
+    assertEquals(taskTypeLastUpdateMsMap.size(), 1);
+    long lastUpdateTimeMs = taskTypeLastUpdateMsMap.get(TASK_TYPE);
+    assertTrue(lastUpdateTimeMs >= tsBeforeSet && lastUpdateTimeMs <= 
tsAfterSet);
+
+    // only the new metadata path exists
+    propertyStore = new FakePropertyStore();
+    tsBeforeSet = System.currentTimeMillis();
+    propertyStore.set(NEW_MINION_METADATA_PATH, 
NEW_TASK_METADATA.toZNRecord(), EXPECTED_VERSION, ACCESS_OPTION);
+    tsAfterSet = System.currentTimeMillis();
+    allTaskMetadataLastUpdateTimeMs =
+        
MinionTaskMetadataUtils.getAllTaskMetadataLastUpdateTimeMs(propertyStore);
+    assertEquals(allTaskMetadataLastUpdateTimeMs.size(), 1);
+    taskTypeLastUpdateMsMap = 
allTaskMetadataLastUpdateTimeMs.get(TABLE_NAME_WITH_TYPE);
+    assertEquals(taskTypeLastUpdateMsMap.size(), 1);
+    lastUpdateTimeMs = taskTypeLastUpdateMsMap.get(TASK_TYPE);
+    assertTrue(lastUpdateTimeMs >= tsBeforeSet && lastUpdateTimeMs <= 
tsAfterSet);
+
+    // if two metadata paths exist at the same time, the newly updated one 
will be used.
+    // the new metadata path is updated later
+    propertyStore = new FakePropertyStore();
+    propertyStore.set(OLD_MINION_METADATA_PATH, 
OLD_TASK_METADATA.toZNRecord(), EXPECTED_VERSION, ACCESS_OPTION);
+    long tsAfterOldPathSet = System.currentTimeMillis();
+    propertyStore.set(NEW_MINION_METADATA_PATH, 
NEW_TASK_METADATA.toZNRecord(), EXPECTED_VERSION, ACCESS_OPTION);
+    long tsAfterNewPathSet = System.currentTimeMillis();
+    allTaskMetadataLastUpdateTimeMs =
+        
MinionTaskMetadataUtils.getAllTaskMetadataLastUpdateTimeMs(propertyStore);
+    assertEquals(allTaskMetadataLastUpdateTimeMs.size(), 1);
+    taskTypeLastUpdateMsMap = 
allTaskMetadataLastUpdateTimeMs.get(TABLE_NAME_WITH_TYPE);
+    assertEquals(taskTypeLastUpdateMsMap.size(), 1);
+    lastUpdateTimeMs = taskTypeLastUpdateMsMap.get(TASK_TYPE);
+    assertTrue(lastUpdateTimeMs >= tsAfterOldPathSet && lastUpdateTimeMs <= 
tsAfterNewPathSet);
+    // the old metadata path is updated later
+    propertyStore = new FakePropertyStore();
+    propertyStore.set(NEW_MINION_METADATA_PATH, 
NEW_TASK_METADATA.toZNRecord(), EXPECTED_VERSION, ACCESS_OPTION);
+    tsAfterNewPathSet = System.currentTimeMillis();
+    propertyStore.set(OLD_MINION_METADATA_PATH, 
OLD_TASK_METADATA.toZNRecord(), EXPECTED_VERSION, ACCESS_OPTION);
+    tsAfterOldPathSet = System.currentTimeMillis();
+    allTaskMetadataLastUpdateTimeMs =
+        
MinionTaskMetadataUtils.getAllTaskMetadataLastUpdateTimeMs(propertyStore);
+    assertEquals(allTaskMetadataLastUpdateTimeMs.size(), 1);
+    taskTypeLastUpdateMsMap = 
allTaskMetadataLastUpdateTimeMs.get(TABLE_NAME_WITH_TYPE);
+    assertEquals(taskTypeLastUpdateMsMap.size(), 1);
+    lastUpdateTimeMs = taskTypeLastUpdateMsMap.get(TASK_TYPE);
+    assertTrue(lastUpdateTimeMs >= tsAfterNewPathSet && lastUpdateTimeMs <= 
tsAfterOldPathSet);
+  }
+
   @Test
   public void testDeleteTaskMetadata() {
     // no error
diff --git 
a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/minion/PinotHelixTaskResourceManager.java
 
b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/minion/PinotHelixTaskResourceManager.java
index 4a2c0bad3c..0aee197832 100644
--- 
a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/minion/PinotHelixTaskResourceManager.java
+++ 
b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/minion/PinotHelixTaskResourceManager.java
@@ -836,6 +836,16 @@ public class PinotHelixTaskResourceManager {
     MinionTaskMetadataUtils.deleteTaskMetadata(propertyStore, taskType, 
tableNameWithType);
   }
 
+  /**
+   * Gets the last update time (in ms) of all minion task metadata.
+   * @return a map storing the last update time (in ms) of all minion task 
metadata: (tableNameWithType -> taskType
+   *         -> last update time in ms)
+   */
+  public Map<String, Map<String, Long>> getTaskMetadataLastUpdateTimeMs() {
+    ZkHelixPropertyStore<ZNRecord> propertyStore = 
_helixResourceManager.getPropertyStore();
+    return 
MinionTaskMetadataUtils.getAllTaskMetadataLastUpdateTimeMs(propertyStore);
+  }
+
   @JsonPropertyOrder({"taskState", "subtaskCount", "startTime", 
"executionStartTime", "finishTime", "subtaskInfos"})
   @JsonInclude(JsonInclude.Include.NON_NULL)
   public static class TaskDebugInfo {
diff --git 
a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/minion/TaskMetricsEmitter.java
 
b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/minion/TaskMetricsEmitter.java
index f589571bda..c7234774a2 100644
--- 
a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/minion/TaskMetricsEmitter.java
+++ 
b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/minion/TaskMetricsEmitter.java
@@ -19,6 +19,7 @@
 package org.apache.pinot.controller.helix.core.minion;
 
 import java.util.List;
+import java.util.Map;
 import java.util.Properties;
 import java.util.Set;
 import org.apache.pinot.common.metrics.ControllerGauge;
@@ -66,6 +67,14 @@ public class TaskMetricsEmitter extends BasePeriodicTask {
       return;
     }
 
+    Map<String, Map<String, Long>> taskMetadataLastUpdateTime =
+        _helixTaskResourceManager.getTaskMetadataLastUpdateTimeMs();
+    taskMetadataLastUpdateTime.forEach((tableNameWithType, 
taskTypeLastUpdateTime) ->
+        taskTypeLastUpdateTime.forEach((taskType, lastUpdateTimeMs) ->
+            _controllerMetrics.addOrUpdateGauge(
+                
ControllerGauge.TIME_MS_SINCE_LAST_MINION_TASK_METADATA_UPDATE.getGaugeName() + 
"."
+                    + tableNameWithType + "." + taskType, () -> 
System.currentTimeMillis() - lastUpdateTimeMs)));
+
     // The call to get task types can take time if there are a lot of tasks.
     // Potential optimization is to call it every (say) 30m if we detect a 
barrage of
     // zk requests.


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to