pvary commented on code in PR #16595:
URL: https://github.com/apache/iceberg/pull/16595#discussion_r3324410788
##########
core/src/test/java/org/apache/iceberg/util/TestFileSystemWalker.java:
##########
@@ -301,6 +301,41 @@ public void
testListDirRecursivelyWithFileIONullFileLocation() {
.hasMessageContaining("Can not create a Path from a null string");
}
+ /**
+ * Regression test: when the base directory is passed without a trailing
slash, the Hadoop listing
+ * should still work correctly and not include files from sibling
directories with similar
+ * prefixes.
+ */
+ @Test
+ public void testListDirRecursivelyWithHadoopNoTrailingSlash() {
+ List<String> foundFiles = Lists.newArrayList();
+ List<String> remainingDirs = Lists.newArrayList();
+ // Use path without trailing slash - should NOT match normal_dir_1
+ String pathWithoutSlash = basePath + "/normal_dir";
+
+ Predicate<FileStatus> fileFilter =
+ fileStatus -> fileStatus.getPath().getName().endsWith(".txt");
+ FileSystemWalker.listDirRecursivelyWithHadoop(
+ pathWithoutSlash,
+ specs,
+ fileFilter,
+ hadoopConf,
+ Integer.MAX_VALUE,
+ Integer.MAX_VALUE,
+ remainingDirs::add,
+ foundFiles::add);
+
+ assertThat(foundFiles).hasSize(2);
+ assertThat(foundFiles)
+ .contains(Paths.get("file://", basePath,
"normal_dir/file2.txt").toString());
+ assertThat(foundFiles)
+ .contains(Paths.get("file://", basePath,
"normal_dir/dep1/file3.txt").toString());
+ // Should NOT contain files from normal_dir_1 (sibling directory with
similar prefix)
+ assertThat(foundFiles)
+ .doesNotContain(Paths.get("file://", basePath,
"normal_dir_1/file4.txt").toString());
Review Comment:
Can we actually create a test which validates this fix?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]