924060929 commented on code in PR #58877:
URL: https://github.com/apache/doris/pull/58877#discussion_r2605099037
##########
fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java:
##########
@@ -1031,54 +976,90 @@ public static class HiveFileStatus {
@Data
public static class HivePartitionValues {
private BiMap<String, Long> partitionNameToIdMap;
- private Map<Long, List<UniqueId>> idToUniqueIdsMap;
private Map<Long, PartitionItem> idToPartitionItem;
private Map<Long, List<String>> partitionValuesMap;
- //multi pair
- private Map<UniqueId, Range<PartitionKey>> uidToPartitionRange;
- private Map<Range<PartitionKey>, UniqueId> rangeToId;
- //single pair
- private RangeMap<ColumnBound, UniqueId> singleColumnRangeMap;
- private Map<UniqueId, Range<ColumnBound>> singleUidToColumnRangeMap;
+
+ // Sorted partition ranges for binary search filtering.
+ // Built at construction time, shares the same lifecycle with
HivePartitionValues.
+ private SortedPartitionRanges<String> sortedPartitionRanges;
public HivePartitionValues() {
}
public HivePartitionValues(Map<Long, PartitionItem> idToPartitionItem,
- Map<UniqueId, Range<PartitionKey>> uidToPartitionRange,
- Map<Range<PartitionKey>, UniqueId> rangeToId,
- RangeMap<ColumnBound, UniqueId> singleColumnRangeMap,
BiMap<String, Long> partitionNameToIdMap,
- Map<Long, List<UniqueId>> idToUniqueIdsMap,
- Map<UniqueId, Range<ColumnBound>> singleUidToColumnRangeMap,
Map<Long, List<String>> partitionValuesMap) {
this.idToPartitionItem = idToPartitionItem;
- this.uidToPartitionRange = uidToPartitionRange;
- this.rangeToId = rangeToId;
- this.singleColumnRangeMap = singleColumnRangeMap;
this.partitionNameToIdMap = partitionNameToIdMap;
- this.idToUniqueIdsMap = idToUniqueIdsMap;
- this.singleUidToColumnRangeMap = singleUidToColumnRangeMap;
this.partitionValuesMap = partitionValuesMap;
+ this.sortedPartitionRanges = buildSortedPartitionRanges();
}
+ /**
+ * Create a copy for incremental updates (add/drop partitions).
+ * The sortedPartitionRanges will be rebuilt after the caller modifies
the partition data.
+ */
public HivePartitionValues copy() {
HivePartitionValues copy = new HivePartitionValues();
copy.setPartitionNameToIdMap(partitionNameToIdMap == null ? null :
HashBiMap.create(partitionNameToIdMap));
- copy.setIdToUniqueIdsMap(idToUniqueIdsMap == null ? null :
Maps.newHashMap(idToUniqueIdsMap));
copy.setIdToPartitionItem(idToPartitionItem == null ? null :
Maps.newHashMap(idToPartitionItem));
copy.setPartitionValuesMap(partitionValuesMap == null ? null :
Maps.newHashMap(partitionValuesMap));
- copy.setUidToPartitionRange(uidToPartitionRange == null ? null :
Maps.newHashMap(uidToPartitionRange));
- copy.setRangeToId(rangeToId == null ? null :
Maps.newHashMap(rangeToId));
- copy.setSingleUidToColumnRangeMap(
- singleUidToColumnRangeMap == null ? null :
Maps.newHashMap(singleUidToColumnRangeMap));
- if (singleColumnRangeMap != null) {
- RangeMap<ColumnBound, UniqueId> copySingleColumnRangeMap =
TreeRangeMap.create();
- copySingleColumnRangeMap.putAll(singleColumnRangeMap);
- copy.setSingleColumnRangeMap(copySingleColumnRangeMap);
- }
+ // sortedPartitionRanges is not copied here, caller should call
rebuildSortedPartitionRanges()
+ // after modifying partition data
return copy;
}
+
+ /**
+ * Rebuild sorted partition ranges after incremental updates.
+ * Should be called after add/drop partitions.
+ */
+ public void rebuildSortedPartitionRanges() {
+ this.sortedPartitionRanges = buildSortedPartitionRanges();
+ }
+
+ /**
+ * Get sorted partition ranges for binary search filtering.
+ */
+ public Optional<SortedPartitionRanges<String>>
getSortedPartitionRanges() {
+ return Optional.ofNullable(sortedPartitionRanges);
+ }
+
+ private SortedPartitionRanges<String> buildSortedPartitionRanges() {
+ if (partitionNameToIdMap == null ||
partitionNameToIdMap.isEmpty()) {
+ return null;
+ }
+
+ BiMap<Long, String> idToName = partitionNameToIdMap.inverse();
+ List<PartitionItemAndRange<String>> sortedRanges =
Lists.newArrayListWithCapacity(
+ idToPartitionItem.size());
+ List<PartitionItemAndId<String>> defaultPartitions =
Lists.newArrayList();
+
+ for (Map.Entry<Long, PartitionItem> entry :
idToPartitionItem.entrySet()) {
+ PartitionItem partitionItem = entry.getValue();
+ String partitionName = idToName.get(entry.getKey());
+ if (!partitionItem.isDefaultPartition()) {
+ List<Range<MultiColumnBound>> ranges =
PartitionItemToRange.toRanges(partitionItem);
+ for (Range<MultiColumnBound> range : ranges) {
+ sortedRanges.add(new
PartitionItemAndRange<>(partitionName, partitionItem, range));
+ }
+ } else {
+ defaultPartitions.add(new
PartitionItemAndId<>(partitionName, partitionItem));
+ }
+ }
+
+ // Sort by range bounds
+ sortedRanges.sort((o1, o2) -> {
+ Range<MultiColumnBound> span1 = o1.range;
+ Range<MultiColumnBound> span2 = o2.range;
+ int result =
span1.lowerEndpoint().compareTo(span2.lowerEndpoint());
+ if (result != 0) {
+ return result;
+ }
+ return span1.upperEndpoint().compareTo(span2.upperEndpoint());
+ });
+
+ return new SortedPartitionRanges<>(sortedRanges,
defaultPartitions);
+ }
Review Comment:
I think you should extract code from
`NereidsSortedPartitionsCacheManager.loadCache` and reuse the same utility
function
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]