lokeshj1703 commented on code in PR #12558:
URL: https://github.com/apache/hudi/pull/12558#discussion_r1907314452
##########
hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java:
##########
@@ -2423,18 +2427,53 @@ public HoodieRecord next() {
private static Stream<HoodieRecord> collectAndProcessColumnMetadata(
List<List<HoodieColumnRangeMetadata<Comparable>>> fileColumnMetadata,
String partitionPath, boolean isTightBound) {
+ return collectAndProcessColumnMetadata(partitionPath, isTightBound,
Option.empty(), fileColumnMetadata.stream().flatMap(List::stream));
+ }
- // Step 1: Flatten and Group by Column Name
- Map<String, List<HoodieColumnRangeMetadata<Comparable>>> columnMetadataMap
= fileColumnMetadata.stream()
- .flatMap(List::stream)
-
.collect(Collectors.groupingBy(HoodieColumnRangeMetadata::getColumnName,
Collectors.toList()));
+ private static Stream<HoodieRecord>
collectAndProcessColumnMetadata(Iterable<HoodieColumnRangeMetadata<Comparable>>
fileColumnMetadataIterable, String partitionPath,
+ boolean
isTightBound, Option<String> indexPartitionOpt) {
- // Step 2: Aggregate Column Ranges
+ List<HoodieColumnRangeMetadata<Comparable>> fileColumnMetadata = new
ArrayList<>();
+ fileColumnMetadataIterable.forEach(fileColumnMetadata::add);
+ // Group by Column Name
+ return collectAndProcessColumnMetadata(partitionPath, isTightBound,
indexPartitionOpt, fileColumnMetadata.stream());
+ }
+
+ private static Stream<HoodieRecord> collectAndProcessColumnMetadata(String
partitionPath, boolean isTightBound, Option<String> indexPartitionOpt,
+
Stream<HoodieColumnRangeMetadata<Comparable>> fileColumnMetadata) {
+ // Group by Column Name
+ Map<String, List<HoodieColumnRangeMetadata<Comparable>>> columnMetadataMap
=
+
fileColumnMetadata.collect(Collectors.groupingBy(HoodieColumnRangeMetadata::getColumnName,
Collectors.toList()));
+
+ // Aggregate Column Ranges
Stream<HoodieColumnRangeMetadata<Comparable>> partitionStatsRangeMetadata
= columnMetadataMap.entrySet().stream()
- .map(entry ->
FileFormatUtils.getColumnRangeInPartition(partitionPath, entry.getValue()));
+ .map(entry -> FileFormatUtils.getColumnRangeInPartition(partitionPath,
entry.getValue()));
// Create Partition Stats Records
- return HoodieMetadataPayload.createPartitionStatsRecords(partitionPath,
partitionStatsRangeMetadata.collect(Collectors.toList()), false, isTightBound);
+ return HoodieMetadataPayload.createPartitionStatsRecords(partitionPath,
partitionStatsRangeMetadata.collect(Collectors.toList()), false, isTightBound,
indexPartitionOpt);
+ }
+
+ public static HoodieData<HoodieRecord>
collectAndProcessEIPartitionStatRecords(
Review Comment:
Renamed
##########
hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java:
##########
@@ -397,7 +399,9 @@ public static Map<String, HoodieData<HoodieRecord>>
convertMetadataToRecords(Hoo
if (enabledPartitionTypes.contains(MetadataPartitionType.PARTITION_STATS))
{
checkState(MetadataPartitionType.COLUMN_STATS.isMetadataPartitionAvailable(dataMetaClient),
"Column stats partition must be enabled to generate partition stats.
Please enable: " +
HoodieMetadataConfig.ENABLE_METADATA_INDEX_COLUMN_STATS.key());
- final HoodieData<HoodieRecord> partitionStatsRDD =
convertMetadataToPartitionStatsRecords(commitMetadata, context, dataMetaClient,
metadataConfig);
+ HoodiePairData<String,
List<List<HoodieColumnRangeMetadata<Comparable>>>> partitionRangeMetadata =
convertMetadataToPartitionStatsColumnRangeMetadata(commitMetadata, context,
Review Comment:
Addressed
##########
hudi-common/src/main/java/org/apache/hudi/index/expression/HoodieExpressionIndex.java:
##########
@@ -35,6 +35,8 @@ public interface HoodieExpressionIndex<S, T> extends
Serializable {
String HOODIE_EXPRESSION_INDEX_PARTITION =
"_hoodie_expression_index_partition";
String HOODIE_EXPRESSION_INDEX_FILE_SIZE =
"_hoodie_expression_index_file_size";
+ String HOODIE_EXPRESSION_INDEX_PARTITION_STAT_PREFIX = "_partition_stat_";
Review Comment:
Addressed
##########
hudi-common/src/main/java/org/apache/hudi/common/data/HoodieListPairData.java:
##########
@@ -191,6 +192,11 @@ public <W> HoodiePairData<K, Pair<V, Option<W>>>
leftOuterJoin(HoodiePairData<K,
return new HoodieListPairData<>(leftOuterJoined, lazy);
}
+ @Override
+ public HoodiePairData<K, V> union(HoodiePairData<K, V> other) {
+ throw new UnsupportedOperationException("Operation is not supported yet");
Review Comment:
Makes sense. Used it.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]