This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 87dc631d425 [opt](hive) use binary search to prune hive partitions
(#58877)
87dc631d425 is described below
commit 87dc631d425df03ce08f4ffcf768643cb86c3474
Author: zy-kkk <[email protected]>
AuthorDate: Thu Dec 11 00:12:03 2025 +0800
[opt](hive) use binary search to prune hive partitions (#58877)
Followup #44586
Enable binary search partition pruning optimization for Hive external
tables.
This PR adds binary search partition pruning support for Hive tables by:
- Adding `getSortedPartitionRanges()` method to `ExternalTable` base
class
- Maintaining sorted partition ranges directly in `HivePartitionValues`
for cache lifecycle consistency
- Overriding `getSortedPartitionRanges()` in `HMSExternalTable` to
provide sorted ranges
**Performance improvement (20000 partitions, 1000 queries):**
- Binary search enabled: **4.548 seconds**
- Binary search disabled: **12.849 seconds**
- **~2.8x faster**
---
.../cache/NereidsSortedPartitionsCacheManager.java | 39 +-----
.../org/apache/doris/datasource/ExternalTable.java | 14 ++
.../doris/datasource/hive/HMSExternalTable.java | 15 +++
.../doris/datasource/hive/HiveMetaStoreCache.java | 143 +++++++--------------
.../expression/rules/SortedPartitionRanges.java | 44 +++++++
.../rules/rewrite/PruneFileScanPartition.java | 12 +-
6 files changed, 129 insertions(+), 138 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/common/cache/NereidsSortedPartitionsCacheManager.java
b/fe/fe-core/src/main/java/org/apache/doris/common/cache/NereidsSortedPartitionsCacheManager.java
index aee8122d7f4..43a14834f55 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/common/cache/NereidsSortedPartitionsCacheManager.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/common/cache/NereidsSortedPartitionsCacheManager.java
@@ -24,11 +24,7 @@ import
org.apache.doris.catalog.SupportBinarySearchFilteringPartitions;
import org.apache.doris.common.Config;
import org.apache.doris.common.ConfigBase.DefaultConfHandler;
import org.apache.doris.datasource.CatalogIf;
-import org.apache.doris.nereids.rules.expression.rules.MultiColumnBound;
-import org.apache.doris.nereids.rules.expression.rules.PartitionItemToRange;
import org.apache.doris.nereids.rules.expression.rules.SortedPartitionRanges;
-import
org.apache.doris.nereids.rules.expression.rules.SortedPartitionRanges.PartitionItemAndId;
-import
org.apache.doris.nereids.rules.expression.rules.SortedPartitionRanges.PartitionItemAndRange;
import org.apache.doris.nereids.trees.plans.algebra.CatalogRelation;
import org.apache.doris.qe.ConnectContext;
import org.apache.doris.qe.SessionVariable;
@@ -37,18 +33,14 @@ import org.apache.doris.rpc.RpcException;
import com.github.benmanes.caffeine.cache.Cache;
import com.github.benmanes.caffeine.cache.Caffeine;
import com.google.common.annotations.VisibleForTesting;
-import com.google.common.collect.Range;
import lombok.AllArgsConstructor;
import lombok.Data;
-import org.apache.hadoop.util.Lists;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import java.lang.reflect.Field;
import java.time.Duration;
-import java.util.List;
import java.util.Map;
-import java.util.Map.Entry;
import java.util.Objects;
import java.util.Optional;
@@ -134,35 +126,10 @@ public class NereidsSortedPartitionsCacheManager {
}
Map<?, PartitionItem> unsortedMap = table.getOriginPartitions(scan);
- List<Entry<?, PartitionItem>> unsortedList =
Lists.newArrayList(unsortedMap.entrySet());
- List<PartitionItemAndRange<?>> sortedRanges =
Lists.newArrayListWithCapacity(unsortedMap.size());
- List<PartitionItemAndId<?>> defaultPartitions = Lists.newArrayList();
- for (Entry<?, PartitionItem> entry : unsortedList) {
- PartitionItem partitionItem = entry.getValue();
- Object id = entry.getKey();
- if (!partitionItem.isDefaultPartition()) {
- List<Range<MultiColumnBound>> ranges =
PartitionItemToRange.toRanges(partitionItem);
- for (Range<MultiColumnBound> range : ranges) {
- sortedRanges.add(new PartitionItemAndRange<>(id,
partitionItem, range));
- }
- } else {
- defaultPartitions.add(new PartitionItemAndId<>(id,
partitionItem));
- }
+ SortedPartitionRanges<?> sortedPartitionRanges =
SortedPartitionRanges.build(unsortedMap);
+ if (sortedPartitionRanges == null) {
+ return null;
}
-
- sortedRanges.sort((o1, o2) -> {
- Range<MultiColumnBound> span1 = o1.range;
- Range<MultiColumnBound> span2 = o2.range;
- int result =
span1.lowerEndpoint().compareTo(span2.lowerEndpoint());
- if (result != 0) {
- return result;
- }
- result = span1.upperEndpoint().compareTo(span2.upperEndpoint());
- return result;
- });
- SortedPartitionRanges<?> sortedPartitionRanges = new
SortedPartitionRanges(
- sortedRanges, defaultPartitions
- );
PartitionCacheContext context = new PartitionCacheContext(
table.getId(), table.getPartitionMetaVersion(scan),
sortedPartitionRanges);
partitionCaches.put(key, context);
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalTable.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalTable.java
index a1d2d8eed88..cf74ef2fb54 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalTable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalTable.java
@@ -33,6 +33,8 @@ import org.apache.doris.common.util.PropertyAnalyzer;
import org.apache.doris.common.util.Util;
import org.apache.doris.datasource.ExternalSchemaCache.SchemaCacheKey;
import org.apache.doris.datasource.mvcc.MvccSnapshot;
+import org.apache.doris.nereids.rules.expression.rules.SortedPartitionRanges;
+import org.apache.doris.nereids.trees.plans.algebra.CatalogRelation;
import
org.apache.doris.nereids.trees.plans.logical.LogicalFileScan.SelectedPartitions;
import org.apache.doris.persist.gson.GsonPostProcessable;
import org.apache.doris.persist.gson.GsonUtils;
@@ -453,6 +455,18 @@ public class ExternalTable implements TableIf, Writable,
GsonPostProcessable {
return false;
}
+ /**
+ * Get sorted partition ranges for binary search filtering.
+ * Subclasses can override this method to provide sorted partition ranges
+ * for efficient partition pruning.
+ *
+ * @param scan the catalog relation
+ * @return sorted partition ranges, or empty if not supported
+ */
+ public Optional<SortedPartitionRanges<String>>
getSortedPartitionRanges(CatalogRelation scan) {
+ return Optional.empty();
+ }
+
@Override
public boolean equals(Object o) {
if (this == o) {
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java
index 2b64a7cd6e2..5fb1b791ab6 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java
@@ -56,6 +56,8 @@ import org.apache.doris.mtmv.MTMVRefreshContext;
import org.apache.doris.mtmv.MTMVRelatedTableIf;
import org.apache.doris.mtmv.MTMVSnapshotIf;
import org.apache.doris.nereids.exceptions.NotSupportedException;
+import org.apache.doris.nereids.rules.expression.rules.SortedPartitionRanges;
+import org.apache.doris.nereids.trees.plans.algebra.CatalogRelation;
import
org.apache.doris.nereids.trees.plans.logical.LogicalFileScan.SelectedPartitions;
import org.apache.doris.qe.GlobalVariable;
import org.apache.doris.qe.SessionVariable;
@@ -388,6 +390,19 @@ public class HMSExternalTable extends ExternalTable
implements MTMVRelatedTableI
return getDlaType() == DLAType.HIVE || getDlaType() == DLAType.HUDI;
}
+ @Override
+ public Optional<SortedPartitionRanges<String>>
getSortedPartitionRanges(CatalogRelation scan) {
+ if (getDlaType() != DLAType.HIVE) {
+ return Optional.empty();
+ }
+ if (CollectionUtils.isEmpty(this.getPartitionColumns())) {
+ return Optional.empty();
+ }
+ HiveMetaStoreCache.HivePartitionValues hivePartitionValues =
getHivePartitionValues(
+ MvccUtil.getSnapshotFromContext(this));
+ return hivePartitionValues.getSortedPartitionRanges();
+ }
+
public SelectedPartitions
initHudiSelectedPartitions(Optional<TableSnapshot> tableSnapshot) {
if (getDlaType() != DLAType.HUDI) {
return SelectedPartitions.NOT_PRUNED;
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
index c6ceb2f4f20..e560099a18d 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
@@ -52,9 +52,8 @@ import org.apache.doris.metric.GaugeMetric;
import org.apache.doris.metric.Metric;
import org.apache.doris.metric.MetricLabel;
import org.apache.doris.metric.MetricRepo;
-import org.apache.doris.planner.ColumnBound;
+import org.apache.doris.nereids.rules.expression.rules.SortedPartitionRanges;
import org.apache.doris.planner.ListPartitionPrunerV2;
-import org.apache.doris.planner.PartitionPrunerV2Base.UniqueId;
import com.github.benmanes.caffeine.cache.CacheLoader;
import com.github.benmanes.caffeine.cache.LoadingCache;
@@ -66,10 +65,7 @@ import com.google.common.collect.HashBiMap;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
-import com.google.common.collect.Range;
-import com.google.common.collect.RangeMap;
import com.google.common.collect.Streams;
-import com.google.common.collect.TreeRangeMap;
import lombok.Data;
import org.apache.commons.lang3.math.NumberUtils;
import org.apache.hadoop.fs.BlockLocation;
@@ -251,7 +247,6 @@ public class HiveMetaStoreCache {
}
Map<Long, PartitionItem> idToPartitionItem =
Maps.newHashMapWithExpectedSize(partitionNames.size());
BiMap<String, Long> partitionNameToIdMap =
HashBiMap.create(partitionNames.size());
- Map<Long, List<UniqueId>> idToUniqueIdsMap =
Maps.newHashMapWithExpectedSize(partitionNames.size());
for (String partitionName : partitionNames) {
long partitionId = Util.genIdByName(catalog.getName(),
nameMapping.getLocalDbName(),
nameMapping.getLocalTblName(), partitionName);
@@ -260,23 +255,8 @@ public class HiveMetaStoreCache {
partitionNameToIdMap.put(partitionName, partitionId);
}
- Map<UniqueId, Range<PartitionKey>> uidToPartitionRange = null;
- Map<Range<PartitionKey>, UniqueId> rangeToId = null;
- RangeMap<ColumnBound, UniqueId> singleColumnRangeMap = null;
- Map<UniqueId, Range<ColumnBound>> singleUidToColumnRangeMap = null;
- if (key.types.size() > 1) {
- // uidToPartitionRange and rangeToId are only used for
multi-column partition
- uidToPartitionRange =
ListPartitionPrunerV2.genUidToPartitionRange(idToPartitionItem,
idToUniqueIdsMap);
- rangeToId =
ListPartitionPrunerV2.genRangeToId(uidToPartitionRange);
- } else {
- Preconditions.checkState(key.types.size() == 1, key.types);
- // singleColumnRangeMap is only used for single-column partition
- singleColumnRangeMap =
ListPartitionPrunerV2.genSingleColumnRangeMap(idToPartitionItem,
idToUniqueIdsMap);
- singleUidToColumnRangeMap =
ListPartitionPrunerV2.genSingleUidToColumnRange(singleColumnRangeMap);
- }
Map<Long, List<String>> partitionValuesMap =
ListPartitionPrunerV2.getPartitionValuesMap(idToPartitionItem);
- return new HivePartitionValues(idToPartitionItem, uidToPartitionRange,
rangeToId, singleColumnRangeMap,
- partitionNameToIdMap, idToUniqueIdsMap,
singleUidToColumnRangeMap, partitionValuesMap);
+ return new HivePartitionValues(idToPartitionItem,
partitionNameToIdMap, partitionValuesMap);
}
private ListPartitionItem toListPartitionItem(String partitionName,
List<Type> types) {
@@ -661,7 +641,6 @@ public class HiveMetaStoreCache {
HivePartitionValues copy = partitionValues.copy();
Map<Long, PartitionItem> idToPartitionItemBefore =
copy.getIdToPartitionItem();
Map<String, Long> partitionNameToIdMapBefore =
copy.getPartitionNameToIdMap();
- Map<Long, List<UniqueId>> idToUniqueIdsMap =
copy.getIdToUniqueIdsMap();
Map<Long, PartitionItem> idToPartitionItem = new HashMap<>();
String localDbName = nameMapping.getLocalDbName();
String localTblName = nameMapping.getLocalTblName();
@@ -679,28 +658,8 @@ public class HiveMetaStoreCache {
Map<Long, List<String>> partitionValuesMapBefore =
copy.getPartitionValuesMap();
Map<Long, List<String>> partitionValuesMap =
ListPartitionPrunerV2.getPartitionValuesMap(idToPartitionItem);
partitionValuesMapBefore.putAll(partitionValuesMap);
- if (key.types.size() > 1) {
- Map<UniqueId, Range<PartitionKey>> uidToPartitionRangeBefore =
copy.getUidToPartitionRange();
- // uidToPartitionRange and rangeToId are only used for
multi-column partition
- Map<UniqueId, Range<PartitionKey>> uidToPartitionRange =
ListPartitionPrunerV2
- .genUidToPartitionRange(idToPartitionItem,
idToUniqueIdsMap);
- uidToPartitionRangeBefore.putAll(uidToPartitionRange);
- Map<Range<PartitionKey>, UniqueId> rangeToIdBefore =
copy.getRangeToId();
- Map<Range<PartitionKey>, UniqueId> rangeToId =
ListPartitionPrunerV2.genRangeToId(uidToPartitionRange);
- rangeToIdBefore.putAll(rangeToId);
- } else {
- Preconditions.checkState(key.types.size() == 1, key.types);
- // singleColumnRangeMap is only used for single-column partition
- RangeMap<ColumnBound, UniqueId> singleColumnRangeMapBefore =
copy.getSingleColumnRangeMap();
- RangeMap<ColumnBound, UniqueId> singleColumnRangeMap =
ListPartitionPrunerV2
- .genSingleColumnRangeMap(idToPartitionItem,
idToUniqueIdsMap);
- singleColumnRangeMapBefore.putAll(singleColumnRangeMap);
- Map<UniqueId, Range<ColumnBound>> singleUidToColumnRangeMapBefore
= copy
- .getSingleUidToColumnRangeMap();
- Map<UniqueId, Range<ColumnBound>> singleUidToColumnRangeMap =
ListPartitionPrunerV2
- .genSingleUidToColumnRange(singleColumnRangeMap);
- singleUidToColumnRangeMapBefore.putAll(singleUidToColumnRangeMap);
- }
+ // Rebuild sorted partition ranges after adding partitions
+ copy.rebuildSortedPartitionRanges();
HivePartitionValues partitionValuesCur =
partitionValuesCache.getIfPresent(key);
if (partitionValuesCur == partitionValues) {
partitionValuesCache.put(key, copy);
@@ -718,11 +677,6 @@ public class HiveMetaStoreCache {
HivePartitionValues copy = partitionValues.copy();
Map<String, Long> partitionNameToIdMapBefore =
copy.getPartitionNameToIdMap();
Map<Long, PartitionItem> idToPartitionItemBefore =
copy.getIdToPartitionItem();
- Map<Long, List<UniqueId>> idToUniqueIdsMapBefore =
copy.getIdToUniqueIdsMap();
- Map<UniqueId, Range<PartitionKey>> uidToPartitionRangeBefore =
copy.getUidToPartitionRange();
- Map<Range<PartitionKey>, UniqueId> rangeToIdBefore =
copy.getRangeToId();
- RangeMap<ColumnBound, UniqueId> singleColumnRangeMapBefore =
copy.getSingleColumnRangeMap();
- Map<UniqueId, Range<ColumnBound>> singleUidToColumnRangeMapBefore =
copy.getSingleUidToColumnRangeMap();
Map<Long, List<String>> partitionValuesMap =
copy.getPartitionValuesMap();
for (String partitionName : partitionNames) {
if (!partitionNameToIdMapBefore.containsKey(partitionName)) {
@@ -733,27 +687,13 @@ public class HiveMetaStoreCache {
Long partitionId =
partitionNameToIdMapBefore.remove(partitionName);
idToPartitionItemBefore.remove(partitionId);
partitionValuesMap.remove(partitionId);
- List<UniqueId> uniqueIds =
idToUniqueIdsMapBefore.remove(partitionId);
- for (UniqueId uniqueId : uniqueIds) {
- if (uidToPartitionRangeBefore != null) {
- Range<PartitionKey> range =
uidToPartitionRangeBefore.remove(uniqueId);
- if (range != null) {
- rangeToIdBefore.remove(range);
- }
- }
-
- if (singleUidToColumnRangeMapBefore != null) {
- Range<ColumnBound> range =
singleUidToColumnRangeMapBefore.remove(uniqueId);
- if (range != null) {
- singleColumnRangeMapBefore.remove(range);
- }
- }
- }
if (invalidPartitionCache) {
invalidatePartitionCache(dorisTable, partitionName);
}
}
+ // Rebuild sorted partition ranges after dropping partitions
+ copy.rebuildSortedPartitionRanges();
HivePartitionValues partitionValuesCur =
partitionValuesCache.getIfPresent(key);
if (partitionValuesCur == partitionValues) {
partitionValuesCache.put(key, copy);
@@ -933,7 +873,7 @@ public class HiveMetaStoreCache {
return dummyKey == ((FileCacheKey) obj).dummyKey;
}
return location.equals(((FileCacheKey) obj).location)
- && Objects.equals(partitionValues, ((FileCacheKey)
obj).partitionValues);
+ && Objects.equals(partitionValues, ((FileCacheKey)
obj).partitionValues);
}
boolean isSameTable(long id) {
@@ -1031,54 +971,69 @@ public class HiveMetaStoreCache {
@Data
public static class HivePartitionValues {
private BiMap<String, Long> partitionNameToIdMap;
- private Map<Long, List<UniqueId>> idToUniqueIdsMap;
private Map<Long, PartitionItem> idToPartitionItem;
private Map<Long, List<String>> partitionValuesMap;
- //multi pair
- private Map<UniqueId, Range<PartitionKey>> uidToPartitionRange;
- private Map<Range<PartitionKey>, UniqueId> rangeToId;
- //single pair
- private RangeMap<ColumnBound, UniqueId> singleColumnRangeMap;
- private Map<UniqueId, Range<ColumnBound>> singleUidToColumnRangeMap;
+
+ // Sorted partition ranges for binary search filtering.
+ // Built at construction time, shares the same lifecycle with
HivePartitionValues.
+ private SortedPartitionRanges<String> sortedPartitionRanges;
public HivePartitionValues() {
}
public HivePartitionValues(Map<Long, PartitionItem> idToPartitionItem,
- Map<UniqueId, Range<PartitionKey>> uidToPartitionRange,
- Map<Range<PartitionKey>, UniqueId> rangeToId,
- RangeMap<ColumnBound, UniqueId> singleColumnRangeMap,
BiMap<String, Long> partitionNameToIdMap,
- Map<Long, List<UniqueId>> idToUniqueIdsMap,
- Map<UniqueId, Range<ColumnBound>> singleUidToColumnRangeMap,
Map<Long, List<String>> partitionValuesMap) {
this.idToPartitionItem = idToPartitionItem;
- this.uidToPartitionRange = uidToPartitionRange;
- this.rangeToId = rangeToId;
- this.singleColumnRangeMap = singleColumnRangeMap;
this.partitionNameToIdMap = partitionNameToIdMap;
- this.idToUniqueIdsMap = idToUniqueIdsMap;
- this.singleUidToColumnRangeMap = singleUidToColumnRangeMap;
this.partitionValuesMap = partitionValuesMap;
+ this.sortedPartitionRanges = buildSortedPartitionRanges();
}
+ /**
+ * Create a copy for incremental updates (add/drop partitions).
+ * The sortedPartitionRanges will be rebuilt after the caller modifies
the partition data.
+ */
public HivePartitionValues copy() {
HivePartitionValues copy = new HivePartitionValues();
copy.setPartitionNameToIdMap(partitionNameToIdMap == null ? null :
HashBiMap.create(partitionNameToIdMap));
- copy.setIdToUniqueIdsMap(idToUniqueIdsMap == null ? null :
Maps.newHashMap(idToUniqueIdsMap));
copy.setIdToPartitionItem(idToPartitionItem == null ? null :
Maps.newHashMap(idToPartitionItem));
copy.setPartitionValuesMap(partitionValuesMap == null ? null :
Maps.newHashMap(partitionValuesMap));
- copy.setUidToPartitionRange(uidToPartitionRange == null ? null :
Maps.newHashMap(uidToPartitionRange));
- copy.setRangeToId(rangeToId == null ? null :
Maps.newHashMap(rangeToId));
- copy.setSingleUidToColumnRangeMap(
- singleUidToColumnRangeMap == null ? null :
Maps.newHashMap(singleUidToColumnRangeMap));
- if (singleColumnRangeMap != null) {
- RangeMap<ColumnBound, UniqueId> copySingleColumnRangeMap =
TreeRangeMap.create();
- copySingleColumnRangeMap.putAll(singleColumnRangeMap);
- copy.setSingleColumnRangeMap(copySingleColumnRangeMap);
- }
+ // sortedPartitionRanges is not copied here, caller should call
rebuildSortedPartitionRanges()
+ // after modifying partition data
return copy;
}
+
+ /**
+ * Rebuild sorted partition ranges after incremental updates.
+ * Should be called after add/drop partitions.
+ */
+ public void rebuildSortedPartitionRanges() {
+ this.sortedPartitionRanges = buildSortedPartitionRanges();
+ }
+
+ /**
+ * Get sorted partition ranges for binary search filtering.
+ */
+ public Optional<SortedPartitionRanges<String>>
getSortedPartitionRanges() {
+ return Optional.ofNullable(sortedPartitionRanges);
+ }
+
+ private SortedPartitionRanges<String> buildSortedPartitionRanges() {
+ if (partitionNameToIdMap == null ||
partitionNameToIdMap.isEmpty()) {
+ return null;
+ }
+
+ // Build name to partition item map for
SortedPartitionRanges.buildFrom
+ BiMap<Long, String> idToName = partitionNameToIdMap.inverse();
+ Map<String, PartitionItem> nameToPartitionItem =
Maps.newHashMapWithExpectedSize(idToPartitionItem.size());
+ for (Map.Entry<Long, PartitionItem> entry :
idToPartitionItem.entrySet()) {
+ String partitionName = idToName.get(entry.getKey());
+ nameToPartitionItem.put(partitionName, entry.getValue());
+ }
+
+ return SortedPartitionRanges.build(nameToPartitionItem);
+ }
}
/**
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/SortedPartitionRanges.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/SortedPartitionRanges.java
index 50d4cb3befa..0c4a1b034b5 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/SortedPartitionRanges.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/SortedPartitionRanges.java
@@ -20,9 +20,11 @@ package org.apache.doris.nereids.rules.expression.rules;
import org.apache.doris.catalog.PartitionItem;
import org.apache.doris.nereids.util.Utils;
+import com.google.common.collect.Lists;
import com.google.common.collect.Range;
import java.util.List;
+import java.util.Map;
import java.util.Objects;
/** SortedPartitionRanges */
@@ -41,6 +43,48 @@ public class SortedPartitionRanges<K> {
);
}
+ /**
+ * Build SortedPartitionRanges from a partition map.
+ * This method extracts the common logic for building sorted partition
ranges
+ * from both NereidsSortedPartitionsCacheManager and HiveMetaStoreCache.
+ *
+ * @param partitionMap map of partition id to partition item
+ * @return SortedPartitionRanges or null if the map is empty
+ */
+ public static <K> SortedPartitionRanges<K> build(Map<K, PartitionItem>
partitionMap) {
+ if (partitionMap == null || partitionMap.isEmpty()) {
+ return null;
+ }
+
+ List<PartitionItemAndRange<K>> sortedRanges =
Lists.newArrayListWithCapacity(partitionMap.size());
+ List<PartitionItemAndId<K>> defaultPartitions = Lists.newArrayList();
+
+ for (Map.Entry<K, PartitionItem> entry : partitionMap.entrySet()) {
+ PartitionItem partitionItem = entry.getValue();
+ K id = entry.getKey();
+ if (!partitionItem.isDefaultPartition()) {
+ List<Range<MultiColumnBound>> ranges =
PartitionItemToRange.toRanges(partitionItem);
+ for (Range<MultiColumnBound> range : ranges) {
+ sortedRanges.add(new PartitionItemAndRange<>(id,
partitionItem, range));
+ }
+ } else {
+ defaultPartitions.add(new PartitionItemAndId<>(id,
partitionItem));
+ }
+ }
+
+ sortedRanges.sort((o1, o2) -> {
+ Range<MultiColumnBound> span1 = o1.range;
+ Range<MultiColumnBound> span2 = o2.range;
+ int result =
span1.lowerEndpoint().compareTo(span2.lowerEndpoint());
+ if (result != 0) {
+ return result;
+ }
+ return span1.upperEndpoint().compareTo(span2.upperEndpoint());
+ });
+
+ return new SortedPartitionRanges<>(sortedRanges, defaultPartitions);
+ }
+
/** PartitionItemAndRange */
public static class PartitionItemAndRange<K> {
public final K id;
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PruneFileScanPartition.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PruneFileScanPartition.java
index 456dde5c5bc..35a1105c916 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PruneFileScanPartition.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PruneFileScanPartition.java
@@ -17,11 +17,8 @@
package org.apache.doris.nereids.rules.rewrite;
-import org.apache.doris.catalog.Env;
import org.apache.doris.catalog.PartitionItem;
-import org.apache.doris.catalog.SupportBinarySearchFilteringPartitions;
import org.apache.doris.common.Pair;
-import org.apache.doris.common.cache.NereidsSortedPartitionsCacheManager;
import org.apache.doris.datasource.ExternalTable;
import org.apache.doris.nereids.CascadesContext;
import org.apache.doris.nereids.rules.Rule;
@@ -96,11 +93,10 @@ public class PruneFileScanPartition extends
OneRewriteRuleFactory {
Map<String, PartitionItem> nameToPartitionItem =
scan.getSelectedPartitions().selectedPartitions;
Optional<SortedPartitionRanges<String>> sortedPartitionRanges =
Optional.empty();
- if (externalTable instanceof SupportBinarySearchFilteringPartitions) {
- NereidsSortedPartitionsCacheManager partitionsCacheManager =
Env.getCurrentEnv()
- .getSortedPartitionsCacheManager();
- sortedPartitionRanges = (Optional) partitionsCacheManager.get(
- (SupportBinarySearchFilteringPartitions)
externalTable, scan);
+ boolean enableBinarySearch = ctx.getConnectContext() == null
+ ||
ctx.getConnectContext().getSessionVariable().enableBinarySearchFilteringPartitions;
+ if (enableBinarySearch) {
+ sortedPartitionRanges = (Optional)
externalTable.getSortedPartitionRanges(scan);
}
Pair<List<String>, Optional<Expression>> res = PartitionPruner.prune(
partitionSlots, filter.getPredicate(), nameToPartitionItem,
ctx,
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]