nssalian commented on code in PR #16351:
URL: https://github.com/apache/iceberg/pull/16351#discussion_r3249646972


##########
core/src/test/java/org/apache/iceberg/util/TestFileSystemWalker.java:
##########
@@ -242,4 +247,109 @@ private void listFilesRecursively(
 
     assertThat(remainingDirs).isEmpty();
   }
+
+  @Test
+  public void testListDirRecursivelyWithFileIOBucketRootBaseDir() {
+    List<FileInfo> entries =
+        ImmutableList.of(
+            new FileInfo("s3://bucket/data/a.parquet", 1L, 0L),
+            new FileInfo("s3://bucket/data/b.parquet", 1L, 0L),
+            new FileInfo("s3://bucket/top.parquet", 1L, 0L));
+    SupportsPrefixOperations mockIO = new StaticPrefixFileIO(entries);
+
+    List<String> foundFiles = Lists.newArrayList();
+    Predicate<FileInfo> fileFilter = fileInfo -> 
fileInfo.location().endsWith(".parquet");
+
+    assertThatCode(
+            () ->
+                FileSystemWalker.listDirRecursivelyWithFileIO(
+                    mockIO, "s3://bucket/", null, fileFilter, foundFiles::add))
+        .doesNotThrowAnyException();
+
+    assertThat(foundFiles)
+        .containsExactlyInAnyOrder(
+            "s3://bucket/data/a.parquet", "s3://bucket/data/b.parquet", 
"s3://bucket/top.parquet");
+  }
+
+  /**
+   * Regression test: with a bucket-root base directory, files living under a 
hidden top-level
+   * directory (e.g. {@code _temporary}) must still be filtered out, and no 
NPE should occur.
+   */
+  @Test
+  public void testListDirRecursivelyWithFileIOBucketRootFiltersHiddenDir() {
+    List<FileInfo> entries =
+        ImmutableList.of(
+            new FileInfo("s3://bucket/data/a.parquet", 1L, 0L),
+            new FileInfo("s3://bucket/_temporary/attempt_x/b.parquet", 1L, 0L),
+            new FileInfo("s3://bucket/.staging/c.parquet", 1L, 0L));
+    SupportsPrefixOperations mockIO = new StaticPrefixFileIO(entries);
+
+    List<String> foundFiles = Lists.newArrayList();
+    Predicate<FileInfo> fileFilter = fileInfo -> 
fileInfo.location().endsWith(".parquet");
+
+    assertThatCode(
+            () ->
+                FileSystemWalker.listDirRecursivelyWithFileIO(
+                    mockIO, "s3://bucket/", null, fileFilter, foundFiles::add))
+        .doesNotThrowAnyException();
+
+    assertThat(foundFiles).containsExactly("s3://bucket/data/a.parquet");
+  }
+
+  /**
+   * Regression test: when the base directory is passed without a trailing 
slash (e.g. {@code
+   * oss://bucket}), the walk should still work correctly without NPE.
+   */
+  @Test
+  public void testListDirRecursivelyWithFileIOBucketRootNoTrailingSlash() {
+    List<FileInfo> entries =
+        ImmutableList.of(
+            new FileInfo("s3://bucket/data/a.parquet", 1L, 0L),
+            new FileInfo("s3://bucket/_hidden/b.parquet", 1L, 0L));
+    SupportsPrefixOperations mockIO = new StaticPrefixFileIO(entries);
+
+    List<String> foundFiles = Lists.newArrayList();
+    Predicate<FileInfo> fileFilter = fileInfo -> 
fileInfo.location().endsWith(".parquet");
+
+    assertThatCode(
+            () ->
+                FileSystemWalker.listDirRecursivelyWithFileIO(
+                    mockIO, "s3://bucket", null, fileFilter, foundFiles::add))
+        .doesNotThrowAnyException();
+
+    assertThat(foundFiles).containsExactly("s3://bucket/data/a.parquet");
+  }
+
+  private static class StaticPrefixFileIO implements SupportsPrefixOperations {

Review Comment:
   I've seen this alert as well, but I don't think we should do a one-off java 
record usage here. 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to