This is an automated email from the ASF dual-hosted git repository.
morrysnow pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 5ea6e4bc035 [enhance](mtmv)Optimize the speed of obtaining the last
update time of Hive (#40169)
5ea6e4bc035 is described below
commit 5ea6e4bc03575444688dc4672d099354ba6b17e6
Author: zhangdong <[email protected]>
AuthorDate: Tue Sep 3 11:43:49 2024 +0800
[enhance](mtmv)Optimize the speed of obtaining the last update time of Hive
(#40169)
Previously, to obtain the last update time of a hive table, it was
necessary to obtain the last update time of all partitions under the
table, which required generating a large map.
---
.../doris/datasource/hive/HMSExternalTable.java | 79 ++++++++++++----------
.../doris/datasource/hive/HiveMetaStoreCache.java | 4 ++
2 files changed, 46 insertions(+), 37 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java
index a9f2da13b40..8217f1c3a36 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java
@@ -53,6 +53,7 @@ import com.google.common.collect.BiMap;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
+import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.collections.MapUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData;
@@ -741,18 +742,17 @@ public class HMSExternalTable extends ExternalTable
implements MTMVRelatedTableI
return res;
}
- private HiveMetaStoreCache.HivePartitionValues getHivePartitionValues() {
- HiveMetaStoreCache cache = Env.getCurrentEnv().getExtMetaCacheMgr()
- .getMetaStoreCache((HMSExternalCatalog) getCatalog());
- return cache.getPartitionValues(
- getDbName(), getName(), getPartitionColumnTypes());
- }
-
@Override
public MTMVSnapshotIf getPartitionSnapshot(String partitionName,
MTMVRefreshContext context)
throws AnalysisException {
- long partitionLastModifyTime =
getPartitionLastModifyTime(partitionName);
- return new MTMVTimestampSnapshot(partitionLastModifyTime);
+ HiveMetaStoreCache cache = Env.getCurrentEnv().getExtMetaCacheMgr()
+ .getMetaStoreCache((HMSExternalCatalog) getCatalog());
+ HiveMetaStoreCache.HivePartitionValues hivePartitionValues =
cache.getPartitionValues(
+ getDbName(), getName(), getPartitionColumnTypes());
+ Long partitionId =
getPartitionIdByNameOrAnalysisException(partitionName, hivePartitionValues);
+ HivePartition hivePartition =
getHivePartitionByIdOrAnalysisException(partitionId,
+ hivePartitionValues, cache);
+ return new MTMVTimestampSnapshot(hivePartition.getLastModifiedTime());
}
@Override
@@ -760,45 +760,50 @@ public class HMSExternalTable extends ExternalTable
implements MTMVRelatedTableI
if (getPartitionType() == PartitionType.UNPARTITIONED) {
return new MTMVMaxTimestampSnapshot(getName(), getLastDdlTime());
}
- String partitionName = "";
+ Long maxPartitionId = 0L;
long maxVersionTime = 0L;
long visibleVersionTime;
- for (Entry<String, PartitionItem> entry :
getAndCopyPartitionItems().entrySet()) {
- visibleVersionTime = getPartitionLastModifyTime(entry.getKey());
+ HiveMetaStoreCache cache = Env.getCurrentEnv().getExtMetaCacheMgr()
+ .getMetaStoreCache((HMSExternalCatalog) getCatalog());
+ HiveMetaStoreCache.HivePartitionValues hivePartitionValues =
cache.getPartitionValues(
+ getDbName(), getName(), getPartitionColumnTypes());
+ BiMap<Long, String> idToName =
hivePartitionValues.getPartitionNameToIdMap().inverse();
+ if (MapUtils.isEmpty(idToName)) {
+ throw new AnalysisException("partitions is empty for : " +
getName());
+ }
+ for (Long partitionId : idToName.keySet()) {
+ visibleVersionTime =
getHivePartitionByIdOrAnalysisException(partitionId, hivePartitionValues,
+ cache).getLastModifiedTime();
if (visibleVersionTime > maxVersionTime) {
maxVersionTime = visibleVersionTime;
- partitionName = entry.getKey();
+ maxPartitionId = partitionId;
}
}
- return new MTMVMaxTimestampSnapshot(partitionName, maxVersionTime);
+ return new MTMVMaxTimestampSnapshot(idToName.get(maxPartitionId),
maxVersionTime);
}
- private long getPartitionLastModifyTime(String partitionName) throws
AnalysisException {
- return getPartitionByName(partitionName).getLastModifiedTime();
- }
-
- private HivePartition getPartitionByName(String partitionName) throws
AnalysisException {
- PartitionItem item = getAndCopyPartitionItems().get(partitionName);
- List<List<String>> partitionValuesList =
transferPartitionItemToPartitionValues(item);
- List<HivePartition> partitions =
getPartitionsByPartitionValues(partitionValuesList);
- if (partitions.size() != 1) {
- throw new AnalysisException("partition not normal, size: " +
partitions.size());
+ private Long getPartitionIdByNameOrAnalysisException(String partitionName,
+ HiveMetaStoreCache.HivePartitionValues hivePartitionValues)
+ throws AnalysisException {
+ Long partitionId =
hivePartitionValues.getPartitionNameToIdMap().get(partitionName);
+ if (partitionId == null) {
+ throw new AnalysisException("can not find partition: " +
partitionName);
}
- return partitions.get(0);
+ return partitionId;
}
- private List<HivePartition>
getPartitionsByPartitionValues(List<List<String>> partitionValuesList) {
- HiveMetaStoreCache cache = Env.getCurrentEnv().getExtMetaCacheMgr()
- .getMetaStoreCache((HMSExternalCatalog) getCatalog());
- return cache.getAllPartitionsWithCache(getDbName(), getName(),
- partitionValuesList);
- }
-
- private List<List<String>>
transferPartitionItemToPartitionValues(PartitionItem item) {
- List<List<String>> partitionValuesList =
Lists.newArrayListWithCapacity(1);
- partitionValuesList.add(
- ((ListPartitionItem)
item).getItems().get(0).getPartitionValuesAsStringListForHive());
- return partitionValuesList;
+ private HivePartition getHivePartitionByIdOrAnalysisException(Long
partitionId,
+ HiveMetaStoreCache.HivePartitionValues hivePartitionValues,
+ HiveMetaStoreCache cache) throws AnalysisException {
+ List<String> partitionValues =
hivePartitionValues.getPartitionValuesMap().get(partitionId);
+ if (CollectionUtils.isEmpty(partitionValues)) {
+ throw new AnalysisException("can not find partitionValues: " +
partitionId);
+ }
+ HivePartition partition = cache.getHivePartition(getDbName(),
getName(), partitionValues);
+ if (partition == null) {
+ throw new AnalysisException("can not find partition: " +
partitionId);
+ }
+ return partition;
}
@Override
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
index b87c14afbc8..99338fb87ad 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
@@ -521,6 +521,10 @@ public class HiveMetaStoreCache {
return fileLists;
}
+ public HivePartition getHivePartition(String dbName, String name,
List<String> partitionValues) {
+ return partitionCache.get(new PartitionCacheKey(dbName, name,
partitionValues));
+ }
+
public List<HivePartition> getAllPartitionsWithCache(String dbName, String
name,
List<List<String>> partitionValuesList) {
return getAllPartitions(dbName, name, partitionValuesList, true);
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]