This is an automated email from the ASF dual-hosted git repository.
snlee pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new 36c82b66b9 report minion task metadata last update time as metric
(#9954)
36c82b66b9 is described below
commit 36c82b66b9845e8f1be7c8f3070656099d10ecfa
Author: Haitao Zhang <[email protected]>
AuthorDate: Wed Dec 21 12:51:54 2022 -0800
report minion task metadata last update time as metric (#9954)
* report minion task metadata last update time as metric
* fix style problem
* address comments
* fix format
* remove nonnull
---
.../configs/controller.yml | 7 +++
.../etc/jmx_prometheus_javaagent/configs/pinot.yml | 7 +++
.../pinot/common/metrics/ControllerGauge.java | 1 +
.../common/minion/MinionTaskMetadataUtils.java | 65 ++++++++++++++++++++++
.../common/utils/helix/FakePropertyStore.java | 23 +++++---
.../common/minion/MinionTaskMetadataUtilsTest.java | 62 +++++++++++++++++++++
.../core/minion/PinotHelixTaskResourceManager.java | 10 ++++
.../helix/core/minion/TaskMetricsEmitter.java | 9 +++
8 files changed, 177 insertions(+), 7 deletions(-)
diff --git
a/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/controller.yml
b/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/controller.yml
index 253c43e069..edf96a9f2a 100644
--- a/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/controller.yml
+++ b/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/controller.yml
@@ -112,6 +112,13 @@ rules:
labels:
taskType: "$1"
status: "$2"
+- pattern: "\"org.apache.pinot.common.metrics\"<type=\"ControllerMetrics\",
name=\"pinot.controller.timeMsSinceLastMinionTaskMetadataUpdate.(\\w+)_(\\w+)\\.(\\w+)\"><>(\\w+)"
+ name: "pinot_controller_timeMsSinceLastMinionTaskMetadataUpdate_$4"
+ cache: true
+ labels:
+ table: "$1"
+ tableType: "$2"
+ taskType: "$3"
- pattern: "\"org.apache.pinot.common.metrics\"<type=\"ControllerMetrics\",
name=\"pinot.controller.pinotLeadControllerResourceEnabled\"><>(\\w+)"
name: "pinot_controller_pinotLeadControllerResourceEnabled_$1"
cache: true
diff --git a/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/pinot.yml
b/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/pinot.yml
index aa958b6152..6a63001bfc 100644
--- a/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/pinot.yml
+++ b/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/pinot.yml
@@ -101,6 +101,13 @@ rules:
labels:
taskType: "$1"
status: "$2"
+- pattern: "\"org.apache.pinot.common.metrics\"<type=\"ControllerMetrics\",
name=\"pinot.controller.timeMsSinceLastMinionTaskMetadataUpdate.(\\w+)_(\\w+)\\.(\\w+)\"><>(\\w+)"
+ name: "pinot_controller_timeMsSinceLastMinionTaskMetadataUpdate_$4"
+ cache: true
+ labels:
+ table: "$1"
+ tableType: "$2"
+ taskType: "$3"
- pattern: "\"org.apache.pinot.common.metrics\"<type=\"ControllerMetrics\",
name=\"pinot.controller.pinotLeadControllerResourceEnabled\"><>(\\w+)"
name: "pinot_controller_pinotLeadControllerResourceEnabled_$1"
cache: true
diff --git
a/pinot-common/src/main/java/org/apache/pinot/common/metrics/ControllerGauge.java
b/pinot-common/src/main/java/org/apache/pinot/common/metrics/ControllerGauge.java
index fdf7c918ac..5df7959d5c 100644
---
a/pinot-common/src/main/java/org/apache/pinot/common/metrics/ControllerGauge.java
+++
b/pinot-common/src/main/java/org/apache/pinot/common/metrics/ControllerGauge.java
@@ -52,6 +52,7 @@ public enum ControllerGauge implements AbstractMetrics.Gauge {
OFFLINE_TABLE_COUNT("TableCount", true),
DISABLED_TABLE_COUNT("TableCount", true),
PERIODIC_TASK_NUM_TABLES_PROCESSED("PeriodicTaskNumTablesProcessed", true),
+
TIME_MS_SINCE_LAST_MINION_TASK_METADATA_UPDATE("TimeMsSinceLastMinionTaskMetadataUpdate",
false),
NUM_MINION_TASKS_IN_PROGRESS("NumMinionTasksInProgress", true),
NUM_MINION_SUBTASKS_WAITING("NumMinionSubtasksWaiting", true),
NUM_MINION_SUBTASKS_RUNNING("NumMinionSubtasksRunning", true),
diff --git
a/pinot-common/src/main/java/org/apache/pinot/common/minion/MinionTaskMetadataUtils.java
b/pinot-common/src/main/java/org/apache/pinot/common/minion/MinionTaskMetadataUtils.java
index e493849a8e..7baf13bfc1 100644
---
a/pinot-common/src/main/java/org/apache/pinot/common/minion/MinionTaskMetadataUtils.java
+++
b/pinot-common/src/main/java/org/apache/pinot/common/minion/MinionTaskMetadataUtils.java
@@ -18,13 +18,17 @@
*/
package org.apache.pinot.common.minion;
+import java.util.HashMap;
import java.util.List;
+import java.util.Map;
import javax.annotation.Nullable;
import org.apache.helix.AccessOption;
import org.apache.helix.store.HelixPropertyStore;
import org.apache.helix.zookeeper.datamodel.ZNRecord;
import org.apache.helix.zookeeper.zkclient.exception.ZkException;
import org.apache.pinot.common.metadata.ZKMetadataProvider;
+import org.apache.pinot.spi.config.table.TableType;
+import org.apache.pinot.spi.utils.StringUtil;
import org.apache.zookeeper.data.Stat;
@@ -63,6 +67,67 @@ public final class MinionTaskMetadataUtils {
return znRecord;
}
+ /**
+ * Gets the last update time (in ms) of all minion task metadata.
+ * @param propertyStore the property store where all minion task metadata is
stored.
+ * @return a map storing the last update time (in ms) of all minion task
metadata: (tableNameWithType -> taskType
+ * -> last update time in ms)
+ */
+ public static Map<String, Map<String, Long>>
getAllTaskMetadataLastUpdateTimeMs(
+ HelixPropertyStore<ZNRecord> propertyStore) {
+ Map<String, Map<String, Long>> tableTaskLastUpdateTimeMsMap = new
HashMap<>();
+ String propertyStorePathForMinionTaskMetadataPrefix =
+ ZKMetadataProvider.getPropertyStorePathForMinionTaskMetadataPrefix();
+ // the old and new path may exist at the same time
+ List<String> tableNameWithTypeOrTaskTypes =
+
propertyStore.getChildNames(propertyStorePathForMinionTaskMetadataPrefix,
AccessOption.PERSISTENT);
+ if (tableNameWithTypeOrTaskTypes == null ||
tableNameWithTypeOrTaskTypes.isEmpty()) {
+ return tableTaskLastUpdateTimeMsMap;
+ }
+ for (String tableNameWithTypeOrTaskType : tableNameWithTypeOrTaskTypes) {
+ String metadataNodeDirectParentPath =
+ StringUtil.join("/", propertyStorePathForMinionTaskMetadataPrefix,
tableNameWithTypeOrTaskType);
+ List<String> metadataNodeNames =
+ propertyStore.getChildNames(metadataNodeDirectParentPath,
AccessOption.PERSISTENT);
+ if (metadataNodeNames == null || metadataNodeNames.isEmpty()) {
+ continue;
+ }
+ // the new path is MINION_TASK_METADATA/${tableNameWthType}/${taskType}
+ // the old path is MINION_TASK_METADATA/${taskType}/${tableNameWthType}
+ // The variable tableNameWithTypeOrTaskType stores the first level child
name of MINION_TASK_METADATA, that's why
+ // when it ends with OFFLINE or REALTIME, it is a new path.
+ boolean isNewPath =
+ tableNameWithTypeOrTaskType.endsWith(TableType.OFFLINE.toString())
|| tableNameWithTypeOrTaskType.endsWith(
+ TableType.REALTIME.toString());
+ for (String metadataNodeName : metadataNodeNames) {
+ String metadataNodePath = StringUtil.join("/",
metadataNodeDirectParentPath, metadataNodeName);
+ Stat stat = propertyStore.getStat(metadataNodePath,
AccessOption.PERSISTENT);
+ if (isNewPath) {
+ saveOrUpdateTaskMetadataLastUpdateTime(tableNameWithTypeOrTaskType,
metadataNodeName, stat.getMtime(),
+ tableTaskLastUpdateTimeMsMap);
+ } else {
+ saveOrUpdateTaskMetadataLastUpdateTime(metadataNodeName,
tableNameWithTypeOrTaskType, stat.getMtime(),
+ tableTaskLastUpdateTimeMsMap);
+ }
+ }
+ }
+ return tableTaskLastUpdateTimeMsMap;
+ }
+
+ private static void saveOrUpdateTaskMetadataLastUpdateTime(String
tableNameWithType, String taskType,
+ long newLastUpdateTimeMs, Map<String, Map<String, Long>>
tableTaskLastUpdateTimeMsMap) {
+ tableTaskLastUpdateTimeMsMap
+ .computeIfAbsent(tableNameWithType, tnt -> new HashMap<>())
+ .compute(taskType, (tt, lastUpdateTimeMs) -> {
+ if (lastUpdateTimeMs == null) {
+ return newLastUpdateTimeMs;
+ } else {
+ // the metadata may be saved in two different places, use the
larger one
+ return Math.max(lastUpdateTimeMs, newLastUpdateTimeMs);
+ }
+ });
+ }
+
/**
* Deletes the minion task metadata ZNRecord for the given minion task and
tableName, from both the new path
* MINION_TASK_METADATA/${tableNameWthType}/${taskType} and the old path
diff --git
a/pinot-common/src/main/java/org/apache/pinot/common/utils/helix/FakePropertyStore.java
b/pinot-common/src/main/java/org/apache/pinot/common/utils/helix/FakePropertyStore.java
index daad67f0de..2c5806aab9 100644
---
a/pinot-common/src/main/java/org/apache/pinot/common/utils/helix/FakePropertyStore.java
+++
b/pinot-common/src/main/java/org/apache/pinot/common/utils/helix/FakePropertyStore.java
@@ -31,6 +31,7 @@ import org.apache.zookeeper.data.Stat;
public class FakePropertyStore extends ZkHelixPropertyStore<ZNRecord> {
private Map<String, ZNRecord> _contents = new HashMap<>();
+ private Map<String, Stat> _statMap = new HashMap<>();
private IZkDataListener _listener = null;
public FakePropertyStore() {
@@ -42,6 +43,11 @@ public class FakePropertyStore extends
ZkHelixPropertyStore<ZNRecord> {
return _contents.get(path);
}
+ @Override
+ public Stat getStat(String path, int options) {
+ return _statMap.get(path);
+ }
+
@Override
public List<String> getChildNames(String parentPath, int options) {
return _contents.keySet().stream()
@@ -62,9 +68,9 @@ public class FakePropertyStore extends
ZkHelixPropertyStore<ZNRecord> {
}
@Override
- public boolean set(String path, ZNRecord stat, int expectedVersion, int
options) {
+ public boolean set(String path, ZNRecord record, int expectedVersion, int
options) {
try {
- setContents(path, stat);
+ setContentAndStat(path, record);
return true;
} catch (Exception e) {
return false;
@@ -72,9 +78,9 @@ public class FakePropertyStore extends
ZkHelixPropertyStore<ZNRecord> {
}
@Override
- public boolean set(String path, ZNRecord stat, int options) {
+ public boolean set(String path, ZNRecord record, int options) {
try {
- setContents(path, stat);
+ setContentAndStat(path, record);
return true;
} catch (Exception e) {
return false;
@@ -88,11 +94,14 @@ public class FakePropertyStore extends
ZkHelixPropertyStore<ZNRecord> {
return true;
}
- public void setContents(String path, ZNRecord contents)
+ public void setContentAndStat(String path, ZNRecord record)
throws Exception {
- _contents.put(path, contents);
+ _contents.put(path, record);
+ Stat stat = new Stat();
+ stat.setMtime(System.currentTimeMillis());
+ _statMap.put(path, stat);
if (_listener != null) {
- _listener.handleDataChange(path, contents);
+ _listener.handleDataChange(path, record);
}
}
diff --git
a/pinot-common/src/test/java/org/apache/pinot/common/minion/MinionTaskMetadataUtilsTest.java
b/pinot-common/src/test/java/org/apache/pinot/common/minion/MinionTaskMetadataUtilsTest.java
index e2f0b39a3f..5f40c2eb22 100644
---
a/pinot-common/src/test/java/org/apache/pinot/common/minion/MinionTaskMetadataUtilsTest.java
+++
b/pinot-common/src/test/java/org/apache/pinot/common/minion/MinionTaskMetadataUtilsTest.java
@@ -18,6 +18,7 @@
*/
package org.apache.pinot.common.minion;
+import java.util.Map;
import org.apache.helix.AccessOption;
import org.apache.helix.store.HelixPropertyStore;
import org.apache.helix.zookeeper.datamodel.ZNRecord;
@@ -76,6 +77,67 @@ public class MinionTaskMetadataUtilsTest {
NEW_TASK_METADATA.toZNRecord());
}
+ @Test
+ public void testGetAllTaskMetadataLastUpdateTimeMs() {
+ // no task metadata exists
+ HelixPropertyStore<ZNRecord> propertyStore = new FakePropertyStore();
+
assertTrue(MinionTaskMetadataUtils.getAllTaskMetadataLastUpdateTimeMs(propertyStore).isEmpty());
+
+ // only the old metadata path exists
+ propertyStore = new FakePropertyStore();
+ long tsBeforeSet = System.currentTimeMillis();
+ propertyStore.set(OLD_MINION_METADATA_PATH,
OLD_TASK_METADATA.toZNRecord(), EXPECTED_VERSION, ACCESS_OPTION);
+ long tsAfterSet = System.currentTimeMillis();
+ Map<String, Map<String, Long>> allTaskMetadataLastUpdateTimeMs =
+
MinionTaskMetadataUtils.getAllTaskMetadataLastUpdateTimeMs(propertyStore);
+ assertEquals(allTaskMetadataLastUpdateTimeMs.size(), 1);
+ Map<String, Long> taskTypeLastUpdateMsMap =
allTaskMetadataLastUpdateTimeMs.get(TABLE_NAME_WITH_TYPE);
+ assertEquals(taskTypeLastUpdateMsMap.size(), 1);
+ long lastUpdateTimeMs = taskTypeLastUpdateMsMap.get(TASK_TYPE);
+ assertTrue(lastUpdateTimeMs >= tsBeforeSet && lastUpdateTimeMs <=
tsAfterSet);
+
+ // only the new metadata path exists
+ propertyStore = new FakePropertyStore();
+ tsBeforeSet = System.currentTimeMillis();
+ propertyStore.set(NEW_MINION_METADATA_PATH,
NEW_TASK_METADATA.toZNRecord(), EXPECTED_VERSION, ACCESS_OPTION);
+ tsAfterSet = System.currentTimeMillis();
+ allTaskMetadataLastUpdateTimeMs =
+
MinionTaskMetadataUtils.getAllTaskMetadataLastUpdateTimeMs(propertyStore);
+ assertEquals(allTaskMetadataLastUpdateTimeMs.size(), 1);
+ taskTypeLastUpdateMsMap =
allTaskMetadataLastUpdateTimeMs.get(TABLE_NAME_WITH_TYPE);
+ assertEquals(taskTypeLastUpdateMsMap.size(), 1);
+ lastUpdateTimeMs = taskTypeLastUpdateMsMap.get(TASK_TYPE);
+ assertTrue(lastUpdateTimeMs >= tsBeforeSet && lastUpdateTimeMs <=
tsAfterSet);
+
+ // if two metadata paths exist at the same time, the newly updated one
will be used.
+ // the new metadata path is updated later
+ propertyStore = new FakePropertyStore();
+ propertyStore.set(OLD_MINION_METADATA_PATH,
OLD_TASK_METADATA.toZNRecord(), EXPECTED_VERSION, ACCESS_OPTION);
+ long tsAfterOldPathSet = System.currentTimeMillis();
+ propertyStore.set(NEW_MINION_METADATA_PATH,
NEW_TASK_METADATA.toZNRecord(), EXPECTED_VERSION, ACCESS_OPTION);
+ long tsAfterNewPathSet = System.currentTimeMillis();
+ allTaskMetadataLastUpdateTimeMs =
+
MinionTaskMetadataUtils.getAllTaskMetadataLastUpdateTimeMs(propertyStore);
+ assertEquals(allTaskMetadataLastUpdateTimeMs.size(), 1);
+ taskTypeLastUpdateMsMap =
allTaskMetadataLastUpdateTimeMs.get(TABLE_NAME_WITH_TYPE);
+ assertEquals(taskTypeLastUpdateMsMap.size(), 1);
+ lastUpdateTimeMs = taskTypeLastUpdateMsMap.get(TASK_TYPE);
+ assertTrue(lastUpdateTimeMs >= tsAfterOldPathSet && lastUpdateTimeMs <=
tsAfterNewPathSet);
+ // the old metadata path is updated later
+ propertyStore = new FakePropertyStore();
+ propertyStore.set(NEW_MINION_METADATA_PATH,
NEW_TASK_METADATA.toZNRecord(), EXPECTED_VERSION, ACCESS_OPTION);
+ tsAfterNewPathSet = System.currentTimeMillis();
+ propertyStore.set(OLD_MINION_METADATA_PATH,
OLD_TASK_METADATA.toZNRecord(), EXPECTED_VERSION, ACCESS_OPTION);
+ tsAfterOldPathSet = System.currentTimeMillis();
+ allTaskMetadataLastUpdateTimeMs =
+
MinionTaskMetadataUtils.getAllTaskMetadataLastUpdateTimeMs(propertyStore);
+ assertEquals(allTaskMetadataLastUpdateTimeMs.size(), 1);
+ taskTypeLastUpdateMsMap =
allTaskMetadataLastUpdateTimeMs.get(TABLE_NAME_WITH_TYPE);
+ assertEquals(taskTypeLastUpdateMsMap.size(), 1);
+ lastUpdateTimeMs = taskTypeLastUpdateMsMap.get(TASK_TYPE);
+ assertTrue(lastUpdateTimeMs >= tsAfterNewPathSet && lastUpdateTimeMs <=
tsAfterOldPathSet);
+ }
+
@Test
public void testDeleteTaskMetadata() {
// no error
diff --git
a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/minion/PinotHelixTaskResourceManager.java
b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/minion/PinotHelixTaskResourceManager.java
index 4a2c0bad3c..0aee197832 100644
---
a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/minion/PinotHelixTaskResourceManager.java
+++
b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/minion/PinotHelixTaskResourceManager.java
@@ -836,6 +836,16 @@ public class PinotHelixTaskResourceManager {
MinionTaskMetadataUtils.deleteTaskMetadata(propertyStore, taskType,
tableNameWithType);
}
+ /**
+ * Gets the last update time (in ms) of all minion task metadata.
+ * @return a map storing the last update time (in ms) of all minion task
metadata: (tableNameWithType -> taskType
+ * -> last update time in ms)
+ */
+ public Map<String, Map<String, Long>> getTaskMetadataLastUpdateTimeMs() {
+ ZkHelixPropertyStore<ZNRecord> propertyStore =
_helixResourceManager.getPropertyStore();
+ return
MinionTaskMetadataUtils.getAllTaskMetadataLastUpdateTimeMs(propertyStore);
+ }
+
@JsonPropertyOrder({"taskState", "subtaskCount", "startTime",
"executionStartTime", "finishTime", "subtaskInfos"})
@JsonInclude(JsonInclude.Include.NON_NULL)
public static class TaskDebugInfo {
diff --git
a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/minion/TaskMetricsEmitter.java
b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/minion/TaskMetricsEmitter.java
index f589571bda..c7234774a2 100644
---
a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/minion/TaskMetricsEmitter.java
+++
b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/minion/TaskMetricsEmitter.java
@@ -19,6 +19,7 @@
package org.apache.pinot.controller.helix.core.minion;
import java.util.List;
+import java.util.Map;
import java.util.Properties;
import java.util.Set;
import org.apache.pinot.common.metrics.ControllerGauge;
@@ -66,6 +67,14 @@ public class TaskMetricsEmitter extends BasePeriodicTask {
return;
}
+ Map<String, Map<String, Long>> taskMetadataLastUpdateTime =
+ _helixTaskResourceManager.getTaskMetadataLastUpdateTimeMs();
+ taskMetadataLastUpdateTime.forEach((tableNameWithType,
taskTypeLastUpdateTime) ->
+ taskTypeLastUpdateTime.forEach((taskType, lastUpdateTimeMs) ->
+ _controllerMetrics.addOrUpdateGauge(
+
ControllerGauge.TIME_MS_SINCE_LAST_MINION_TASK_METADATA_UPDATE.getGaugeName() +
"."
+ + tableNameWithType + "." + taskType, () ->
System.currentTimeMillis() - lastUpdateTimeMs)));
+
// The call to get task types can take time if there are a lot of tasks.
// Potential optimization is to call it every (say) 30m if we detect a
barrage of
// zk requests.
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]