voonhous commented on code in PR #18837:
URL: https://github.com/apache/hudi/pull/18837#discussion_r3340351168


##########
hudi-trino-plugin/src/main/java/io/trino/plugin/hudi/stats/TableMetadataReader.java:
##########
@@ -55,60 +48,29 @@ public class TableMetadataReader
      * @return a map from column name to their corresponding {@link 
HoodieColumnRangeMetadata}
      * @throws HoodieMetadataException if an error occurs while fetching the 
column statistics
      */
-    Map<String, HoodieColumnRangeMetadata> getColumnStats(List<Pair<String, 
String>> partitionNameFileNameList, List<String> columnNames)
+    public Map<String, HoodieColumnRangeMetadata> 
getColumnsRange(List<Pair<String, String>> partitionNameFileNameList, 
List<String> columnNames)
             throws HoodieMetadataException
     {
-        return 
computeFileToColumnStatsMap(computeColumnStatsLookupKeys(partitionNameFileNameList,
 columnNames));
-    }
-
-    /**
-     * @param partitionNameFileNameList a list of partition and file name 
pairs for which column stats need to be retrieved
-     * @param columnNames list of column names for which stats are needed
-     * @return a list of column stats keys to look up in the metadata table 
col_stats partition.
-     */
-    private List<String> computeColumnStatsLookupKeys(
-            final List<Pair<String, String>> partitionNameFileNameList,
-            final List<String> columnNames)
-    {
-        return columnNames.stream()
-                .flatMap(columnName -> partitionNameFileNameList.stream()
-                        .map(partitionNameFileNamePair -> 
HoodieMetadataPayload.getColumnStatsIndexKey(
-                                new 
PartitionIndexID(HoodieTableMetadataUtil.getColumnStatsIndexPartitionIdentifier(partitionNameFileNamePair.getLeft())),
-                                new 
FileIndexID(partitionNameFileNamePair.getRight()),
-                                new ColumnIndexID(columnName))))
-                .toList();
-    }
-
-    /**
-     * @param columnStatsLookupKeys a map from column stats key to partition 
and file name pair
-     * @return a map from column name to merged HoodieMetadataColumnStats
-     */
-    private Map<String, HoodieColumnRangeMetadata> 
computeFileToColumnStatsMap(List<String> columnStatsLookupKeys)
-    {
-        HoodieTimer timer = HoodieTimer.start();
-        Map<String, HoodieRecord<HoodieMetadataPayload>> hoodieRecords =
-                getRecordsByKeys(columnStatsLookupKeys, 
MetadataPartitionType.COLUMN_STATS.getPartitionPath());
-        metrics.ifPresent(m -> 
m.updateMetrics(HoodieMetadataMetrics.LOOKUP_COLUMN_STATS_METADATA_STR, 
timer.endTimer()));
-        return hoodieRecords.values().stream()
-                .collect(Collectors.groupingBy(
-                        r -> 
r.getData().getColumnStatMetadata().get().getColumnName(),
-                        Collectors.mapping(r -> 
r.getData().getColumnStatMetadata().get(), Collectors.toList())))
+        Map<Pair<String, String>, List<HoodieMetadataColumnStats>> 
columnStatsMap = getColumnStats(partitionNameFileNameList, columnNames);
+        return 
columnStatsMap.values().stream().flatMap(Collection::stream).collect(Collectors.groupingBy(
+                HoodieMetadataColumnStats::getColumnName,
+                Collectors.mapping(colStats -> colStats, Collectors.toList())))

Review Comment:
   Addressed in a6d2cd7. Replaced `Collectors.mapping(colStats -> colStats, 
Collectors.toList())` with `Collectors.toList()` as the `groupingBy` downstream 
collector.



##########
hudi-trino-plugin/src/main/java/io/trino/plugin/hudi/stats/TableMetadataReader.java:
##########
@@ -55,60 +48,29 @@ public class TableMetadataReader
      * @return a map from column name to their corresponding {@link 
HoodieColumnRangeMetadata}
      * @throws HoodieMetadataException if an error occurs while fetching the 
column statistics
      */
-    Map<String, HoodieColumnRangeMetadata> getColumnStats(List<Pair<String, 
String>> partitionNameFileNameList, List<String> columnNames)
+    public Map<String, HoodieColumnRangeMetadata> 
getColumnsRange(List<Pair<String, String>> partitionNameFileNameList, 
List<String> columnNames)
             throws HoodieMetadataException
     {
-        return 
computeFileToColumnStatsMap(computeColumnStatsLookupKeys(partitionNameFileNameList,
 columnNames));
-    }
-
-    /**
-     * @param partitionNameFileNameList a list of partition and file name 
pairs for which column stats need to be retrieved
-     * @param columnNames list of column names for which stats are needed
-     * @return a list of column stats keys to look up in the metadata table 
col_stats partition.
-     */
-    private List<String> computeColumnStatsLookupKeys(
-            final List<Pair<String, String>> partitionNameFileNameList,
-            final List<String> columnNames)
-    {
-        return columnNames.stream()
-                .flatMap(columnName -> partitionNameFileNameList.stream()
-                        .map(partitionNameFileNamePair -> 
HoodieMetadataPayload.getColumnStatsIndexKey(
-                                new 
PartitionIndexID(HoodieTableMetadataUtil.getColumnStatsIndexPartitionIdentifier(partitionNameFileNamePair.getLeft())),
-                                new 
FileIndexID(partitionNameFileNamePair.getRight()),
-                                new ColumnIndexID(columnName))))
-                .toList();
-    }
-
-    /**
-     * @param columnStatsLookupKeys a map from column stats key to partition 
and file name pair
-     * @return a map from column name to merged HoodieMetadataColumnStats
-     */
-    private Map<String, HoodieColumnRangeMetadata> 
computeFileToColumnStatsMap(List<String> columnStatsLookupKeys)
-    {
-        HoodieTimer timer = HoodieTimer.start();
-        Map<String, HoodieRecord<HoodieMetadataPayload>> hoodieRecords =
-                getRecordsByKeys(columnStatsLookupKeys, 
MetadataPartitionType.COLUMN_STATS.getPartitionPath());
-        metrics.ifPresent(m -> 
m.updateMetrics(HoodieMetadataMetrics.LOOKUP_COLUMN_STATS_METADATA_STR, 
timer.endTimer()));
-        return hoodieRecords.values().stream()
-                .collect(Collectors.groupingBy(
-                        r -> 
r.getData().getColumnStatMetadata().get().getColumnName(),
-                        Collectors.mapping(r -> 
r.getData().getColumnStatMetadata().get(), Collectors.toList())))
+        Map<Pair<String, String>, List<HoodieMetadataColumnStats>> 
columnStatsMap = getColumnStats(partitionNameFileNameList, columnNames);
+        return 
columnStatsMap.values().stream().flatMap(Collection::stream).collect(Collectors.groupingBy(
+                HoodieMetadataColumnStats::getColumnName,
+                Collectors.mapping(colStats -> colStats, Collectors.toList())))

Review Comment:
   Addressed in a6d2cd7. Replaced `Collectors.mapping(colStats -> colStats, 
Collectors.toList())` with `Collectors.toList()` as the `groupingBy` downstream 
collector.



##########
hudi-trino-plugin/src/main/java/io/trino/plugin/hudi/stats/TableMetadataReader.java:
##########
@@ -55,60 +48,29 @@ public class TableMetadataReader
      * @return a map from column name to their corresponding {@link 
HoodieColumnRangeMetadata}
      * @throws HoodieMetadataException if an error occurs while fetching the 
column statistics
      */
-    Map<String, HoodieColumnRangeMetadata> getColumnStats(List<Pair<String, 
String>> partitionNameFileNameList, List<String> columnNames)
+    public Map<String, HoodieColumnRangeMetadata> 
getColumnsRange(List<Pair<String, String>> partitionNameFileNameList, 
List<String> columnNames)
             throws HoodieMetadataException
     {
-        return 
computeFileToColumnStatsMap(computeColumnStatsLookupKeys(partitionNameFileNameList,
 columnNames));
-    }
-
-    /**
-     * @param partitionNameFileNameList a list of partition and file name 
pairs for which column stats need to be retrieved
-     * @param columnNames list of column names for which stats are needed
-     * @return a list of column stats keys to look up in the metadata table 
col_stats partition.
-     */
-    private List<String> computeColumnStatsLookupKeys(
-            final List<Pair<String, String>> partitionNameFileNameList,
-            final List<String> columnNames)
-    {
-        return columnNames.stream()
-                .flatMap(columnName -> partitionNameFileNameList.stream()
-                        .map(partitionNameFileNamePair -> 
HoodieMetadataPayload.getColumnStatsIndexKey(
-                                new 
PartitionIndexID(HoodieTableMetadataUtil.getColumnStatsIndexPartitionIdentifier(partitionNameFileNamePair.getLeft())),
-                                new 
FileIndexID(partitionNameFileNamePair.getRight()),
-                                new ColumnIndexID(columnName))))
-                .toList();
-    }
-
-    /**
-     * @param columnStatsLookupKeys a map from column stats key to partition 
and file name pair
-     * @return a map from column name to merged HoodieMetadataColumnStats
-     */
-    private Map<String, HoodieColumnRangeMetadata> 
computeFileToColumnStatsMap(List<String> columnStatsLookupKeys)
-    {
-        HoodieTimer timer = HoodieTimer.start();
-        Map<String, HoodieRecord<HoodieMetadataPayload>> hoodieRecords =
-                getRecordsByKeys(columnStatsLookupKeys, 
MetadataPartitionType.COLUMN_STATS.getPartitionPath());
-        metrics.ifPresent(m -> 
m.updateMetrics(HoodieMetadataMetrics.LOOKUP_COLUMN_STATS_METADATA_STR, 
timer.endTimer()));
-        return hoodieRecords.values().stream()
-                .collect(Collectors.groupingBy(
-                        r -> 
r.getData().getColumnStatMetadata().get().getColumnName(),
-                        Collectors.mapping(r -> 
r.getData().getColumnStatMetadata().get(), Collectors.toList())))
+        Map<Pair<String, String>, List<HoodieMetadataColumnStats>> 
columnStatsMap = getColumnStats(partitionNameFileNameList, columnNames);
+        return 
columnStatsMap.values().stream().flatMap(Collection::stream).collect(Collectors.groupingBy(
+                HoodieMetadataColumnStats::getColumnName,
+                Collectors.mapping(colStats -> colStats, Collectors.toList())))

Review Comment:
   Addressed in a6d2cd7. Replaced `Collectors.mapping(colStats -> colStats, 
Collectors.toList())` with `Collectors.toList()` as the `groupingBy` downstream 
collector.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to