YuweiXiao commented on code in PR #6680:
URL: https://github.com/apache/hudi/pull/6680#discussion_r1000200644
##########
hudi-common/src/main/java/org/apache/hudi/BaseHoodieTableFileIndex.java:
##########
@@ -310,35 +256,159 @@ private void doRefresh() {
)
);
- cachedFileSize = cachedAllInputFileSlices.values().stream()
+ this.cachedFileSize += ret.values().stream()
.flatMap(Collection::stream)
.mapToLong(BaseHoodieTableFileIndex::fileSliceSize)
.sum();
- // If the partition value contains InternalRow.empty, we query it as a
non-partitioned table.
- queryAsNonePartitionedTable = partitionFiles.keySet().stream().anyMatch(p
-> p.values.length == 0);
+ return ret;
+ }
- long duration = System.currentTimeMillis() - startTime;
+ /**
+ * Get partition path with the given partition value
+ * @param partitionNames partition names
+ * @param values partition values
+ * @return partitions that match the given partition values
+ */
+ protected List<PartitionPath> getPartitionPaths(String[] partitionNames,
String[] values) {
+ if (cachedAllPartitionPaths != null) {
+ LOG.info("All partition paths have already loaded, use it directly");
+ return cachedAllPartitionPaths;
+ }
- LOG.info(String.format("Refresh table %s, spent: %d ms",
metaClient.getTableConfig().getTableName(), duration));
+ Pair<String, Boolean> relativeQueryPartitionPathPair =
composeRelativePartitionPaths(partitionNames, values);
+ // If the composed partition path is complete, we return it directly, to
save extra DFS listing operations.
+ if (relativeQueryPartitionPathPair.getRight()) {
+ return Collections.singletonList(new
PartitionPath(relativeQueryPartitionPathPair.getLeft(),
+ parsePartitionColumnValues(partitionColumns,
relativeQueryPartitionPathPair.getLeft())));
+ }
+ // The input partition values (from query predicate) forms a prefix of
partition path, do listing to the path only.
+ return
listPartitionPaths(Collections.singletonList(relativeQueryPartitionPathPair.getLeft()));
}
- private Map<String, FileStatus[]>
getAllFilesInPartitionsUnchecked(Collection<String>
fullPartitionPathsMapToFetch) {
- try {
- return tableMetadata.getAllFilesInPartitions(new
ArrayList<>(fullPartitionPathsMapToFetch));
- } catch (IOException e) {
- throw new HoodieIOException("Failed to list partition paths for a
table", e);
+ /**
+ * Construct relative partition (i.e., partition prefix) from the given
partition values
Review Comment:
Fixed.
##########
hudi-common/src/main/java/org/apache/hudi/BaseHoodieTableFileIndex.java:
##########
@@ -310,35 +256,159 @@ private void doRefresh() {
)
);
- cachedFileSize = cachedAllInputFileSlices.values().stream()
+ this.cachedFileSize += ret.values().stream()
.flatMap(Collection::stream)
.mapToLong(BaseHoodieTableFileIndex::fileSliceSize)
.sum();
- // If the partition value contains InternalRow.empty, we query it as a
non-partitioned table.
- queryAsNonePartitionedTable = partitionFiles.keySet().stream().anyMatch(p
-> p.values.length == 0);
+ return ret;
+ }
- long duration = System.currentTimeMillis() - startTime;
+ /**
+ * Get partition path with the given partition value
+ * @param partitionNames partition names
+ * @param values partition values
+ * @return partitions that match the given partition values
+ */
+ protected List<PartitionPath> getPartitionPaths(String[] partitionNames,
String[] values) {
Review Comment:
Yeah, sure!
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]