n3nash commented on a change in pull request #698: HUDI-135 - Skip Meta folder
when looking for partitions
URL: https://github.com/apache/incubator-hudi/pull/698#discussion_r288332213
##########
File path: hoodie-common/src/main/java/com/uber/hoodie/common/util/FSUtils.java
##########
@@ -201,31 +203,53 @@ public static String getRelativePartitionPath(Path
basePath, Path partitionPath)
partitions.add(getRelativePartitionPath(basePath,
filePath.getParent()));
}
return true;
- });
+ }, true);
return partitions;
}
public static final List<String> getAllDataFilesForMarkers(FileSystem fs,
String basePath, String instantTs,
String markerDir) throws IOException {
List<String> dataFiles = new LinkedList<>();
- FSUtils.processFiles(fs, markerDir, (status) -> {
+ processFiles(fs, markerDir, (status) -> {
String pathStr = status.getPath().toString();
if (pathStr.endsWith(MARKER_EXTN)) {
dataFiles.add(FSUtils.translateMarkerToDataPath(basePath, pathStr,
instantTs));
}
return true;
- });
+ }, false);
return dataFiles;
}
- private static final void processFiles(FileSystem fs, String basePathStr,
- Function<LocatedFileStatus, Boolean> consumer) throws IOException {
- RemoteIterator<LocatedFileStatus> allFiles = fs.listFiles(new
Path(basePathStr), true);
- while (allFiles.hasNext()) {
- LocatedFileStatus status = allFiles.next();
- boolean success = consumer.apply(status);
- if (!success) {
- throw new HoodieException("Failed to process file-status=" + status);
+ /**
+ * Recursively processes all files in the base-path. If excludeMetaFolder is
set, the meta-folder and all its
+ * subdirs are skipped
+ * @param fs File System
+ * @param basePathStr Base-Path
+ * @param consumer Callback for processing
+ * @param excludeMetaFolder Exclude .hoodie folder
+ * @throws IOException
+ */
+ @VisibleForTesting
+ static void processFiles(FileSystem fs, String basePathStr,
+ Function<FileStatus, Boolean> consumer, boolean excludeMetaFolder)
throws IOException {
+ PathFilter pathFilter = excludeMetaFolder ? getExcludeMetaPathFilter() :
ALLOW_ALL_FILTER;
+ FileStatus[] topLevelStatuses = fs.listStatus(new Path(basePathStr));
Review comment:
Can we not use globStatus with filter passed here ?
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services