This is an automated email from the ASF dual-hosted git repository.
morrysnow pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.1 by this push:
new 5c650d92eab branch-3.1: [opt](hive) use binary search to prune hive
partitions #58877 (#58932)
5c650d92eab is described below
commit 5c650d92eabd64fba0077f28bd8e1022c0d163cc
Author: zy-kkk <[email protected]>
AuthorDate: Wed Dec 17 17:09:16 2025 +0800
branch-3.1: [opt](hive) use binary search to prune hive partitions #58877
(#58932)
pick #58877
---
.../cache/NereidsSortedPartitionsCacheManager.java | 39 +-----
.../org/apache/doris/datasource/ExternalTable.java | 14 ++
.../doris/datasource/hive/HMSExternalTable.java | 15 +++
.../doris/datasource/hive/HiveMetaStoreCache.java | 143 +++++++--------------
.../expression/rules/SortedPartitionRanges.java | 44 +++++++
.../rules/rewrite/PruneFileScanPartition.java | 12 +-
.../apache/doris/datasource/CatalogMgrTest.java | 24 +---
7 files changed, 130 insertions(+), 161 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/common/cache/NereidsSortedPartitionsCacheManager.java
b/fe/fe-core/src/main/java/org/apache/doris/common/cache/NereidsSortedPartitionsCacheManager.java
index c7674113014..e839c566036 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/common/cache/NereidsSortedPartitionsCacheManager.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/common/cache/NereidsSortedPartitionsCacheManager.java
@@ -24,11 +24,7 @@ import
org.apache.doris.catalog.SupportBinarySearchFilteringPartitions;
import org.apache.doris.common.Config;
import org.apache.doris.common.ConfigBase.DefaultConfHandler;
import org.apache.doris.datasource.CatalogIf;
-import org.apache.doris.nereids.rules.expression.rules.MultiColumnBound;
-import org.apache.doris.nereids.rules.expression.rules.PartitionItemToRange;
import org.apache.doris.nereids.rules.expression.rules.SortedPartitionRanges;
-import
org.apache.doris.nereids.rules.expression.rules.SortedPartitionRanges.PartitionItemAndId;
-import
org.apache.doris.nereids.rules.expression.rules.SortedPartitionRanges.PartitionItemAndRange;
import org.apache.doris.nereids.trees.plans.algebra.CatalogRelation;
import org.apache.doris.qe.ConnectContext;
import org.apache.doris.rpc.RpcException;
@@ -36,18 +32,14 @@ import org.apache.doris.rpc.RpcException;
import com.github.benmanes.caffeine.cache.Cache;
import com.github.benmanes.caffeine.cache.Caffeine;
import com.google.common.annotations.VisibleForTesting;
-import com.google.common.collect.Range;
import lombok.AllArgsConstructor;
import lombok.Data;
-import org.apache.hadoop.util.Lists;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import java.lang.reflect.Field;
import java.time.Duration;
-import java.util.List;
import java.util.Map;
-import java.util.Map.Entry;
import java.util.Objects;
import java.util.Optional;
@@ -123,35 +115,10 @@ public class NereidsSortedPartitionsCacheManager {
}
Map<?, PartitionItem> unsortedMap = table.getOriginPartitions(scan);
- List<Entry<?, PartitionItem>> unsortedList =
Lists.newArrayList(unsortedMap.entrySet());
- List<PartitionItemAndRange<?>> sortedRanges =
Lists.newArrayListWithCapacity(unsortedMap.size());
- List<PartitionItemAndId<?>> defaultPartitions = Lists.newArrayList();
- for (Entry<?, PartitionItem> entry : unsortedList) {
- PartitionItem partitionItem = entry.getValue();
- Object id = entry.getKey();
- if (!partitionItem.isDefaultPartition()) {
- List<Range<MultiColumnBound>> ranges =
PartitionItemToRange.toRanges(partitionItem);
- for (Range<MultiColumnBound> range : ranges) {
- sortedRanges.add(new PartitionItemAndRange<>(id,
partitionItem, range));
- }
- } else {
- defaultPartitions.add(new PartitionItemAndId<>(id,
partitionItem));
- }
+ SortedPartitionRanges<?> sortedPartitionRanges =
SortedPartitionRanges.build(unsortedMap);
+ if (sortedPartitionRanges == null) {
+ return null;
}
-
- sortedRanges.sort((o1, o2) -> {
- Range<MultiColumnBound> span1 = o1.range;
- Range<MultiColumnBound> span2 = o2.range;
- int result =
span1.lowerEndpoint().compareTo(span2.lowerEndpoint());
- if (result != 0) {
- return result;
- }
- result = span1.upperEndpoint().compareTo(span2.upperEndpoint());
- return result;
- });
- SortedPartitionRanges<?> sortedPartitionRanges = new
SortedPartitionRanges(
- sortedRanges, defaultPartitions
- );
PartitionCacheContext context = new PartitionCacheContext(
table.getId(), table.getPartitionMetaVersion(scan),
sortedPartitionRanges);
partitionCaches.put(key, context);
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalTable.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalTable.java
index 12887cff1d9..8015337c06f 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalTable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalTable.java
@@ -33,6 +33,8 @@ import org.apache.doris.common.util.PropertyAnalyzer;
import org.apache.doris.common.util.Util;
import org.apache.doris.datasource.ExternalSchemaCache.SchemaCacheKey;
import org.apache.doris.datasource.mvcc.MvccSnapshot;
+import org.apache.doris.nereids.rules.expression.rules.SortedPartitionRanges;
+import org.apache.doris.nereids.trees.plans.algebra.CatalogRelation;
import
org.apache.doris.nereids.trees.plans.logical.LogicalFileScan.SelectedPartitions;
import org.apache.doris.persist.gson.GsonPostProcessable;
import org.apache.doris.persist.gson.GsonUtils;
@@ -453,6 +455,18 @@ public class ExternalTable implements TableIf, Writable,
GsonPostProcessable {
return false;
}
+ /**
+ * Get sorted partition ranges for binary search filtering.
+ * Subclasses can override this method to provide sorted partition ranges
+ * for efficient partition pruning.
+ *
+ * @param scan the catalog relation
+ * @return sorted partition ranges, or empty if not supported
+ */
+ public Optional<SortedPartitionRanges<String>>
getSortedPartitionRanges(CatalogRelation scan) {
+ return Optional.empty();
+ }
+
@Override
public boolean equals(Object o) {
if (this == o) {
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java
index e7e2125f41d..f6c195ad529 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java
@@ -55,6 +55,8 @@ import org.apache.doris.mtmv.MTMVRefreshContext;
import org.apache.doris.mtmv.MTMVRelatedTableIf;
import org.apache.doris.mtmv.MTMVSnapshotIf;
import org.apache.doris.nereids.exceptions.NotSupportedException;
+import org.apache.doris.nereids.rules.expression.rules.SortedPartitionRanges;
+import org.apache.doris.nereids.trees.plans.algebra.CatalogRelation;
import
org.apache.doris.nereids.trees.plans.logical.LogicalFileScan.SelectedPartitions;
import org.apache.doris.qe.GlobalVariable;
import org.apache.doris.statistics.AnalysisInfo;
@@ -360,6 +362,19 @@ public class HMSExternalTable extends ExternalTable
implements MTMVRelatedTableI
return getDlaType() == DLAType.HIVE || getDlaType() == DLAType.HUDI;
}
+ @Override
+ public Optional<SortedPartitionRanges<String>>
getSortedPartitionRanges(CatalogRelation scan) {
+ if (getDlaType() != DLAType.HIVE) {
+ return Optional.empty();
+ }
+ if (CollectionUtils.isEmpty(this.getPartitionColumns())) {
+ return Optional.empty();
+ }
+ HiveMetaStoreCache.HivePartitionValues hivePartitionValues =
getHivePartitionValues(
+ MvccUtil.getSnapshotFromContext(this));
+ return hivePartitionValues.getSortedPartitionRanges();
+ }
+
public SelectedPartitions
initHudiSelectedPartitions(Optional<TableSnapshot> tableSnapshot) {
if (getDlaType() != DLAType.HUDI) {
return SelectedPartitions.NOT_PRUNED;
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
index c0dbf68495e..d1d6f6fc563 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
@@ -53,9 +53,8 @@ import org.apache.doris.metric.GaugeMetric;
import org.apache.doris.metric.Metric;
import org.apache.doris.metric.MetricLabel;
import org.apache.doris.metric.MetricRepo;
-import org.apache.doris.planner.ColumnBound;
+import org.apache.doris.nereids.rules.expression.rules.SortedPartitionRanges;
import org.apache.doris.planner.ListPartitionPrunerV2;
-import org.apache.doris.planner.PartitionPrunerV2Base.UniqueId;
import com.github.benmanes.caffeine.cache.CacheLoader;
import com.github.benmanes.caffeine.cache.LoadingCache;
@@ -67,10 +66,7 @@ import com.google.common.collect.HashBiMap;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
-import com.google.common.collect.Range;
-import com.google.common.collect.RangeMap;
import com.google.common.collect.Streams;
-import com.google.common.collect.TreeRangeMap;
import lombok.Data;
import org.apache.commons.lang3.math.NumberUtils;
import org.apache.hadoop.conf.Configuration;
@@ -260,7 +256,6 @@ public class HiveMetaStoreCache {
}
Map<Long, PartitionItem> idToPartitionItem =
Maps.newHashMapWithExpectedSize(partitionNames.size());
BiMap<String, Long> partitionNameToIdMap =
HashBiMap.create(partitionNames.size());
- Map<Long, List<UniqueId>> idToUniqueIdsMap =
Maps.newHashMapWithExpectedSize(partitionNames.size());
for (String partitionName : partitionNames) {
long partitionId = Util.genIdByName(catalog.getName(),
nameMapping.getLocalDbName(),
nameMapping.getLocalTblName(), partitionName);
@@ -269,23 +264,8 @@ public class HiveMetaStoreCache {
partitionNameToIdMap.put(partitionName, partitionId);
}
- Map<UniqueId, Range<PartitionKey>> uidToPartitionRange = null;
- Map<Range<PartitionKey>, UniqueId> rangeToId = null;
- RangeMap<ColumnBound, UniqueId> singleColumnRangeMap = null;
- Map<UniqueId, Range<ColumnBound>> singleUidToColumnRangeMap = null;
- if (key.types.size() > 1) {
- // uidToPartitionRange and rangeToId are only used for
multi-column partition
- uidToPartitionRange =
ListPartitionPrunerV2.genUidToPartitionRange(idToPartitionItem,
idToUniqueIdsMap);
- rangeToId =
ListPartitionPrunerV2.genRangeToId(uidToPartitionRange);
- } else {
- Preconditions.checkState(key.types.size() == 1, key.types);
- // singleColumnRangeMap is only used for single-column partition
- singleColumnRangeMap =
ListPartitionPrunerV2.genSingleColumnRangeMap(idToPartitionItem,
idToUniqueIdsMap);
- singleUidToColumnRangeMap =
ListPartitionPrunerV2.genSingleUidToColumnRange(singleColumnRangeMap);
- }
Map<Long, List<String>> partitionValuesMap =
ListPartitionPrunerV2.getPartitionValuesMap(idToPartitionItem);
- return new HivePartitionValues(idToPartitionItem, uidToPartitionRange,
rangeToId, singleColumnRangeMap,
- partitionNameToIdMap, idToUniqueIdsMap,
singleUidToColumnRangeMap, partitionValuesMap);
+ return new HivePartitionValues(idToPartitionItem,
partitionNameToIdMap, partitionValuesMap);
}
@VisibleForTesting
@@ -677,7 +657,6 @@ public class HiveMetaStoreCache {
HivePartitionValues copy = partitionValues.copy();
Map<Long, PartitionItem> idToPartitionItemBefore =
copy.getIdToPartitionItem();
Map<String, Long> partitionNameToIdMapBefore =
copy.getPartitionNameToIdMap();
- Map<Long, List<UniqueId>> idToUniqueIdsMap =
copy.getIdToUniqueIdsMap();
Map<Long, PartitionItem> idToPartitionItem = new HashMap<>();
String localDbName = nameMapping.getLocalDbName();
String localTblName = nameMapping.getLocalTblName();
@@ -695,28 +674,8 @@ public class HiveMetaStoreCache {
Map<Long, List<String>> partitionValuesMapBefore =
copy.getPartitionValuesMap();
Map<Long, List<String>> partitionValuesMap =
ListPartitionPrunerV2.getPartitionValuesMap(idToPartitionItem);
partitionValuesMapBefore.putAll(partitionValuesMap);
- if (key.types.size() > 1) {
- Map<UniqueId, Range<PartitionKey>> uidToPartitionRangeBefore =
copy.getUidToPartitionRange();
- // uidToPartitionRange and rangeToId are only used for
multi-column partition
- Map<UniqueId, Range<PartitionKey>> uidToPartitionRange =
ListPartitionPrunerV2
- .genUidToPartitionRange(idToPartitionItem,
idToUniqueIdsMap);
- uidToPartitionRangeBefore.putAll(uidToPartitionRange);
- Map<Range<PartitionKey>, UniqueId> rangeToIdBefore =
copy.getRangeToId();
- Map<Range<PartitionKey>, UniqueId> rangeToId =
ListPartitionPrunerV2.genRangeToId(uidToPartitionRange);
- rangeToIdBefore.putAll(rangeToId);
- } else {
- Preconditions.checkState(key.types.size() == 1, key.types);
- // singleColumnRangeMap is only used for single-column partition
- RangeMap<ColumnBound, UniqueId> singleColumnRangeMapBefore =
copy.getSingleColumnRangeMap();
- RangeMap<ColumnBound, UniqueId> singleColumnRangeMap =
ListPartitionPrunerV2
- .genSingleColumnRangeMap(idToPartitionItem,
idToUniqueIdsMap);
- singleColumnRangeMapBefore.putAll(singleColumnRangeMap);
- Map<UniqueId, Range<ColumnBound>> singleUidToColumnRangeMapBefore
= copy
- .getSingleUidToColumnRangeMap();
- Map<UniqueId, Range<ColumnBound>> singleUidToColumnRangeMap =
ListPartitionPrunerV2
- .genSingleUidToColumnRange(singleColumnRangeMap);
- singleUidToColumnRangeMapBefore.putAll(singleUidToColumnRangeMap);
- }
+ // Rebuild sorted partition ranges after adding partitions
+ copy.rebuildSortedPartitionRanges();
HivePartitionValues partitionValuesCur =
partitionValuesCache.getIfPresent(key);
if (partitionValuesCur == partitionValues) {
partitionValuesCache.put(key, copy);
@@ -734,11 +693,6 @@ public class HiveMetaStoreCache {
HivePartitionValues copy = partitionValues.copy();
Map<String, Long> partitionNameToIdMapBefore =
copy.getPartitionNameToIdMap();
Map<Long, PartitionItem> idToPartitionItemBefore =
copy.getIdToPartitionItem();
- Map<Long, List<UniqueId>> idToUniqueIdsMapBefore =
copy.getIdToUniqueIdsMap();
- Map<UniqueId, Range<PartitionKey>> uidToPartitionRangeBefore =
copy.getUidToPartitionRange();
- Map<Range<PartitionKey>, UniqueId> rangeToIdBefore =
copy.getRangeToId();
- RangeMap<ColumnBound, UniqueId> singleColumnRangeMapBefore =
copy.getSingleColumnRangeMap();
- Map<UniqueId, Range<ColumnBound>> singleUidToColumnRangeMapBefore =
copy.getSingleUidToColumnRangeMap();
Map<Long, List<String>> partitionValuesMap =
copy.getPartitionValuesMap();
for (String partitionName : partitionNames) {
if (!partitionNameToIdMapBefore.containsKey(partitionName)) {
@@ -749,27 +703,13 @@ public class HiveMetaStoreCache {
Long partitionId =
partitionNameToIdMapBefore.remove(partitionName);
idToPartitionItemBefore.remove(partitionId);
partitionValuesMap.remove(partitionId);
- List<UniqueId> uniqueIds =
idToUniqueIdsMapBefore.remove(partitionId);
- for (UniqueId uniqueId : uniqueIds) {
- if (uidToPartitionRangeBefore != null) {
- Range<PartitionKey> range =
uidToPartitionRangeBefore.remove(uniqueId);
- if (range != null) {
- rangeToIdBefore.remove(range);
- }
- }
-
- if (singleUidToColumnRangeMapBefore != null) {
- Range<ColumnBound> range =
singleUidToColumnRangeMapBefore.remove(uniqueId);
- if (range != null) {
- singleColumnRangeMapBefore.remove(range);
- }
- }
- }
if (invalidPartitionCache) {
invalidatePartitionCache(dorisTable, partitionName);
}
}
+ // Rebuild sorted partition ranges after dropping partitions
+ copy.rebuildSortedPartitionRanges();
HivePartitionValues partitionValuesCur =
partitionValuesCache.getIfPresent(key);
if (partitionValuesCur == partitionValues) {
partitionValuesCache.put(key, copy);
@@ -1044,7 +984,7 @@ public class HiveMetaStoreCache {
return dummyKey == ((FileCacheKey) obj).dummyKey;
}
return location.equals(((FileCacheKey) obj).location)
- && Objects.equals(partitionValues, ((FileCacheKey)
obj).partitionValues);
+ && Objects.equals(partitionValues, ((FileCacheKey)
obj).partitionValues);
}
boolean isSameTable(long id) {
@@ -1142,54 +1082,69 @@ public class HiveMetaStoreCache {
@Data
public static class HivePartitionValues {
private BiMap<String, Long> partitionNameToIdMap;
- private Map<Long, List<UniqueId>> idToUniqueIdsMap;
private Map<Long, PartitionItem> idToPartitionItem;
private Map<Long, List<String>> partitionValuesMap;
- //multi pair
- private Map<UniqueId, Range<PartitionKey>> uidToPartitionRange;
- private Map<Range<PartitionKey>, UniqueId> rangeToId;
- //single pair
- private RangeMap<ColumnBound, UniqueId> singleColumnRangeMap;
- private Map<UniqueId, Range<ColumnBound>> singleUidToColumnRangeMap;
+
+ // Sorted partition ranges for binary search filtering.
+ // Built at construction time, shares the same lifecycle with
HivePartitionValues.
+ private SortedPartitionRanges<String> sortedPartitionRanges;
public HivePartitionValues() {
}
public HivePartitionValues(Map<Long, PartitionItem> idToPartitionItem,
- Map<UniqueId, Range<PartitionKey>> uidToPartitionRange,
- Map<Range<PartitionKey>, UniqueId> rangeToId,
- RangeMap<ColumnBound, UniqueId> singleColumnRangeMap,
BiMap<String, Long> partitionNameToIdMap,
- Map<Long, List<UniqueId>> idToUniqueIdsMap,
- Map<UniqueId, Range<ColumnBound>> singleUidToColumnRangeMap,
Map<Long, List<String>> partitionValuesMap) {
this.idToPartitionItem = idToPartitionItem;
- this.uidToPartitionRange = uidToPartitionRange;
- this.rangeToId = rangeToId;
- this.singleColumnRangeMap = singleColumnRangeMap;
this.partitionNameToIdMap = partitionNameToIdMap;
- this.idToUniqueIdsMap = idToUniqueIdsMap;
- this.singleUidToColumnRangeMap = singleUidToColumnRangeMap;
this.partitionValuesMap = partitionValuesMap;
+ this.sortedPartitionRanges = buildSortedPartitionRanges();
}
+ /**
+ * Create a copy for incremental updates (add/drop partitions).
+ * The sortedPartitionRanges will be rebuilt after the caller modifies
the partition data.
+ */
public HivePartitionValues copy() {
HivePartitionValues copy = new HivePartitionValues();
copy.setPartitionNameToIdMap(partitionNameToIdMap == null ? null :
HashBiMap.create(partitionNameToIdMap));
- copy.setIdToUniqueIdsMap(idToUniqueIdsMap == null ? null :
Maps.newHashMap(idToUniqueIdsMap));
copy.setIdToPartitionItem(idToPartitionItem == null ? null :
Maps.newHashMap(idToPartitionItem));
copy.setPartitionValuesMap(partitionValuesMap == null ? null :
Maps.newHashMap(partitionValuesMap));
- copy.setUidToPartitionRange(uidToPartitionRange == null ? null :
Maps.newHashMap(uidToPartitionRange));
- copy.setRangeToId(rangeToId == null ? null :
Maps.newHashMap(rangeToId));
- copy.setSingleUidToColumnRangeMap(
- singleUidToColumnRangeMap == null ? null :
Maps.newHashMap(singleUidToColumnRangeMap));
- if (singleColumnRangeMap != null) {
- RangeMap<ColumnBound, UniqueId> copySingleColumnRangeMap =
TreeRangeMap.create();
- copySingleColumnRangeMap.putAll(singleColumnRangeMap);
- copy.setSingleColumnRangeMap(copySingleColumnRangeMap);
- }
+ // sortedPartitionRanges is not copied here, caller should call
rebuildSortedPartitionRanges()
+ // after modifying partition data
return copy;
}
+
+ /**
+ * Rebuild sorted partition ranges after incremental updates.
+ * Should be called after add/drop partitions.
+ */
+ public void rebuildSortedPartitionRanges() {
+ this.sortedPartitionRanges = buildSortedPartitionRanges();
+ }
+
+ /**
+ * Get sorted partition ranges for binary search filtering.
+ */
+ public Optional<SortedPartitionRanges<String>>
getSortedPartitionRanges() {
+ return Optional.ofNullable(sortedPartitionRanges);
+ }
+
+ private SortedPartitionRanges<String> buildSortedPartitionRanges() {
+ if (partitionNameToIdMap == null ||
partitionNameToIdMap.isEmpty()) {
+ return null;
+ }
+
+ // Build name to partition item map for
SortedPartitionRanges.buildFrom
+ BiMap<Long, String> idToName = partitionNameToIdMap.inverse();
+ Map<String, PartitionItem> nameToPartitionItem =
Maps.newHashMapWithExpectedSize(idToPartitionItem.size());
+ for (Map.Entry<Long, PartitionItem> entry :
idToPartitionItem.entrySet()) {
+ String partitionName = idToName.get(entry.getKey());
+ nameToPartitionItem.put(partitionName, entry.getValue());
+ }
+
+ return SortedPartitionRanges.build(nameToPartitionItem);
+ }
}
/**
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/SortedPartitionRanges.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/SortedPartitionRanges.java
index 50d4cb3befa..0c4a1b034b5 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/SortedPartitionRanges.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/SortedPartitionRanges.java
@@ -20,9 +20,11 @@ package org.apache.doris.nereids.rules.expression.rules;
import org.apache.doris.catalog.PartitionItem;
import org.apache.doris.nereids.util.Utils;
+import com.google.common.collect.Lists;
import com.google.common.collect.Range;
import java.util.List;
+import java.util.Map;
import java.util.Objects;
/** SortedPartitionRanges */
@@ -41,6 +43,48 @@ public class SortedPartitionRanges<K> {
);
}
+ /**
+ * Build SortedPartitionRanges from a partition map.
+ * This method extracts the common logic for building sorted partition
ranges
+ * from both NereidsSortedPartitionsCacheManager and HiveMetaStoreCache.
+ *
+ * @param partitionMap map of partition id to partition item
+ * @return SortedPartitionRanges or null if the map is empty
+ */
+ public static <K> SortedPartitionRanges<K> build(Map<K, PartitionItem>
partitionMap) {
+ if (partitionMap == null || partitionMap.isEmpty()) {
+ return null;
+ }
+
+ List<PartitionItemAndRange<K>> sortedRanges =
Lists.newArrayListWithCapacity(partitionMap.size());
+ List<PartitionItemAndId<K>> defaultPartitions = Lists.newArrayList();
+
+ for (Map.Entry<K, PartitionItem> entry : partitionMap.entrySet()) {
+ PartitionItem partitionItem = entry.getValue();
+ K id = entry.getKey();
+ if (!partitionItem.isDefaultPartition()) {
+ List<Range<MultiColumnBound>> ranges =
PartitionItemToRange.toRanges(partitionItem);
+ for (Range<MultiColumnBound> range : ranges) {
+ sortedRanges.add(new PartitionItemAndRange<>(id,
partitionItem, range));
+ }
+ } else {
+ defaultPartitions.add(new PartitionItemAndId<>(id,
partitionItem));
+ }
+ }
+
+ sortedRanges.sort((o1, o2) -> {
+ Range<MultiColumnBound> span1 = o1.range;
+ Range<MultiColumnBound> span2 = o2.range;
+ int result =
span1.lowerEndpoint().compareTo(span2.lowerEndpoint());
+ if (result != 0) {
+ return result;
+ }
+ return span1.upperEndpoint().compareTo(span2.upperEndpoint());
+ });
+
+ return new SortedPartitionRanges<>(sortedRanges, defaultPartitions);
+ }
+
/** PartitionItemAndRange */
public static class PartitionItemAndRange<K> {
public final K id;
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PruneFileScanPartition.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PruneFileScanPartition.java
index 0fb8e68c486..7635ac803b5 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PruneFileScanPartition.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PruneFileScanPartition.java
@@ -17,10 +17,7 @@
package org.apache.doris.nereids.rules.rewrite;
-import org.apache.doris.catalog.Env;
import org.apache.doris.catalog.PartitionItem;
-import org.apache.doris.catalog.SupportBinarySearchFilteringPartitions;
-import org.apache.doris.common.cache.NereidsSortedPartitionsCacheManager;
import org.apache.doris.datasource.ExternalTable;
import org.apache.doris.nereids.CascadesContext;
import org.apache.doris.nereids.rules.Rule;
@@ -94,11 +91,10 @@ public class PruneFileScanPartition extends
OneRewriteRuleFactory {
Map<String, PartitionItem> nameToPartitionItem =
scan.getSelectedPartitions().selectedPartitions;
Optional<SortedPartitionRanges<String>> sortedPartitionRanges =
Optional.empty();
- if (externalTable instanceof SupportBinarySearchFilteringPartitions) {
- NereidsSortedPartitionsCacheManager partitionsCacheManager =
Env.getCurrentEnv()
- .getSortedPartitionsCacheManager();
- sortedPartitionRanges = (Optional) partitionsCacheManager.get(
- (SupportBinarySearchFilteringPartitions)
externalTable, scan);
+ boolean enableBinarySearch = ctx.getConnectContext() == null
+ ||
ctx.getConnectContext().getSessionVariable().enableBinarySearchFilteringPartitions;
+ if (enableBinarySearch) {
+ sortedPartitionRanges = (Optional)
externalTable.getSortedPartitionRanges(scan);
}
List<String> prunedPartitions = new ArrayList<>(PartitionPruner.prune(
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/datasource/CatalogMgrTest.java
b/fe/fe-core/src/test/java/org/apache/doris/datasource/CatalogMgrTest.java
index 2a7422cc55d..2b6fa18828f 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/datasource/CatalogMgrTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/datasource/CatalogMgrTest.java
@@ -34,7 +34,6 @@ import org.apache.doris.catalog.Env;
import org.apache.doris.catalog.EsResource;
import org.apache.doris.catalog.ListPartitionItem;
import org.apache.doris.catalog.PartitionItem;
-import org.apache.doris.catalog.PartitionKey;
import org.apache.doris.catalog.PrimitiveType;
import org.apache.doris.catalog.ResourceMgr;
import org.apache.doris.catalog.Type;
@@ -54,21 +53,16 @@ import
org.apache.doris.datasource.hive.HiveMetaStoreCache.FileCacheValue;
import org.apache.doris.datasource.hive.HiveMetaStoreCache.HivePartitionValues;
import
org.apache.doris.datasource.hive.HiveMetaStoreCache.PartitionValueCacheKey;
import org.apache.doris.mysql.privilege.Auth;
-import org.apache.doris.planner.ColumnBound;
import org.apache.doris.planner.ListPartitionPrunerV2;
-import org.apache.doris.planner.PartitionPrunerV2Base.UniqueId;
import org.apache.doris.qe.ConnectContext;
import org.apache.doris.qe.ShowResultSet;
import org.apache.doris.utframe.TestWithFeService;
import com.github.benmanes.caffeine.cache.LoadingCache;
-import com.google.common.base.Preconditions;
import com.google.common.collect.BiMap;
import com.google.common.collect.HashBiMap;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
-import com.google.common.collect.Range;
-import com.google.common.collect.RangeMap;
import org.junit.Assert;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
@@ -532,7 +526,6 @@ public class CatalogMgrTest extends TestWithFeService {
// partition name format: nation=cn/city=beijing
Map<Long, PartitionItem> idToPartitionItem =
Maps.newHashMapWithExpectedSize(partitionNames.size());
BiMap<String, Long> partitionNameToIdMap =
HashBiMap.create(partitionNames.size());
- Map<Long, List<UniqueId>> idToUniqueIdsMap =
Maps.newHashMapWithExpectedSize(partitionNames.size());
long idx = 0;
for (String partitionName : partitionNames) {
long partitionId = idx++;
@@ -541,23 +534,8 @@ public class CatalogMgrTest extends TestWithFeService {
partitionNameToIdMap.put(partitionName, partitionId);
}
- Map<UniqueId, Range<PartitionKey>> uidToPartitionRange = null;
- Map<Range<PartitionKey>, UniqueId> rangeToId = null;
- RangeMap<ColumnBound, UniqueId> singleColumnRangeMap = null;
- Map<UniqueId, Range<ColumnBound>> singleUidToColumnRangeMap = null;
- if (key.getTypes().size() > 1) {
- // uidToPartitionRange and rangeToId are only used for
multi-column partition
- uidToPartitionRange =
ListPartitionPrunerV2.genUidToPartitionRange(idToPartitionItem,
idToUniqueIdsMap);
- rangeToId =
ListPartitionPrunerV2.genRangeToId(uidToPartitionRange);
- } else {
- Preconditions.checkState(key.getTypes().size() == 1,
key.getTypes());
- // singleColumnRangeMap is only used for single-column partition
- singleColumnRangeMap =
ListPartitionPrunerV2.genSingleColumnRangeMap(idToPartitionItem,
idToUniqueIdsMap);
- singleUidToColumnRangeMap =
ListPartitionPrunerV2.genSingleUidToColumnRange(singleColumnRangeMap);
- }
Map<Long, List<String>> partitionValuesMap =
ListPartitionPrunerV2.getPartitionValuesMap(idToPartitionItem);
- return new HivePartitionValues(idToPartitionItem, uidToPartitionRange,
rangeToId, singleColumnRangeMap,
- partitionNameToIdMap, idToUniqueIdsMap,
singleUidToColumnRangeMap, partitionValuesMap);
+ return new HivePartitionValues(idToPartitionItem,
partitionNameToIdMap, partitionValuesMap);
}
@Test
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]