This is an automated email from the ASF dual-hosted git repository.

pvary pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg.git


The following commit(s) were added to refs/heads/main by this push:
     new 886ecab4d6 Core: Fix NPE in RemoveOrphanFiles with prefix_listing for 
root table location(#16351)
886ecab4d6 is described below

commit 886ecab4d6f72415fac69e1ad55f0bab9c25d18d
Author: liuliquan-marshal <[email protected]>
AuthorDate: Mon May 18 18:12:26 2026 +0800

    Core: Fix NPE in RemoveOrphanFiles with prefix_listing for root table 
location(#16351)
---
 .../org/apache/iceberg/util/FileSystemWalker.java  | 15 ++--
 .../apache/iceberg/util/TestFileSystemWalker.java  | 99 ++++++++++++++++++++++
 2 files changed, 108 insertions(+), 6 deletions(-)

diff --git a/core/src/main/java/org/apache/iceberg/util/FileSystemWalker.java 
b/core/src/main/java/org/apache/iceberg/util/FileSystemWalker.java
index 9cd4906952..479c8e221e 100644
--- a/core/src/main/java/org/apache/iceberg/util/FileSystemWalker.java
+++ b/core/src/main/java/org/apache/iceberg/util/FileSystemWalker.java
@@ -160,18 +160,21 @@ public class FileSystemWalker {
    * @return {@code true} if the path is hidden, {@code false} otherwise
    */
   private static boolean isHiddenPath(String baseDir, Path path, PathFilter 
pathFilter) {
-    boolean isHiddenPath = false;
     Path currentPath = path;
-    while (currentPath.getParent().toString().contains(baseDir)) {
+    Path parent = currentPath.getParent();
+    // Walk up the path hierarchy while the parent directory is still within 
baseDir.
+    // Null-check the parent to avoid NPE when the walk reaches the storage 
root
+    // (e.g., an S3 bucket root such as "s3://bucket/"), whose getParent() 
returns null.
+    while (parent != null && parent.toString().contains(baseDir)) {
       if (!pathFilter.accept(currentPath)) {
-        isHiddenPath = true;
-        break;
+        return true;
       }
 
-      currentPath = currentPath.getParent();
+      currentPath = parent;
+      parent = currentPath.getParent();
     }
 
-    return isHiddenPath;
+    return false;
   }
 
   /**
diff --git 
a/core/src/test/java/org/apache/iceberg/util/TestFileSystemWalker.java 
b/core/src/test/java/org/apache/iceberg/util/TestFileSystemWalker.java
index a748a46215..ba03c073c6 100644
--- a/core/src/test/java/org/apache/iceberg/util/TestFileSystemWalker.java
+++ b/core/src/test/java/org/apache/iceberg/util/TestFileSystemWalker.java
@@ -19,6 +19,7 @@
 package org.apache.iceberg.util;
 
 import static org.assertj.core.api.Assertions.assertThat;
+import static org.assertj.core.api.Assertions.assertThatThrownBy;
 
 import java.io.IOException;
 import java.nio.file.Files;
@@ -33,6 +34,10 @@ import org.apache.iceberg.PartitionSpec;
 import org.apache.iceberg.Schema;
 import org.apache.iceberg.hadoop.HadoopFileIO;
 import org.apache.iceberg.io.FileInfo;
+import org.apache.iceberg.io.InputFile;
+import org.apache.iceberg.io.OutputFile;
+import org.apache.iceberg.io.SupportsPrefixOperations;
+import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
 import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
 import org.apache.iceberg.relocated.com.google.common.collect.Lists;
 import org.apache.iceberg.types.Types;
@@ -242,4 +247,98 @@ public class TestFileSystemWalker {
 
     assertThat(remainingDirs).isEmpty();
   }
+
+  @Test
+  public void testListDirRecursivelyWithFileIOBucketRootBaseDir() {
+    assertThat(
+            listWithMockFileIO(
+                "s3://bucket/",
+                new FileInfo("s3://bucket/data/a.parquet", 1L, 0L),
+                new FileInfo("s3://bucket/data/b.parquet", 1L, 0L),
+                new FileInfo("s3://bucket/top.parquet", 1L, 0L)))
+        .containsExactlyInAnyOrder(
+            "s3://bucket/data/a.parquet", "s3://bucket/data/b.parquet", 
"s3://bucket/top.parquet");
+  }
+
+  /**
+   * Regression test: with a bucket-root base directory, files living under a 
hidden top-level
+   * directory (e.g. {@code _temporary}) must still be filtered out, and no 
NPE should occur.
+   */
+  @Test
+  public void testListDirRecursivelyWithFileIOBucketRootFiltersHiddenDir() {
+    assertThat(
+            listWithMockFileIO(
+                "s3://bucket/",
+                new FileInfo("s3://bucket/data/a.parquet", 1L, 0L),
+                new FileInfo("s3://bucket/_temporary/attempt_x/b.parquet", 1L, 
0L),
+                new FileInfo("s3://bucket/.staging/c.parquet", 1L, 0L)))
+        .containsExactly("s3://bucket/data/a.parquet");
+  }
+
+  /**
+   * Regression test: when the base directory is passed without a trailing 
slash (e.g. {@code
+   * oss://bucket}), the walk should still work correctly without NPE.
+   */
+  @Test
+  public void testListDirRecursivelyWithFileIOBucketRootNoTrailingSlash() {
+    assertThat(
+            listWithMockFileIO(
+                "s3://bucket",
+                new FileInfo("s3://bucket/data/a.parquet", 1L, 0L),
+                new FileInfo("s3://bucket/_hidden/b.parquet", 1L, 0L)))
+        .containsExactly("s3://bucket/data/a.parquet");
+  }
+
+  @Test
+  public void testListDirRecursivelyWithFileIONullFileLocation() {
+    assertThatThrownBy(
+            () ->
+                listWithMockFileIO(
+                    "s3://bucket/",
+                    new FileInfo(null, 1L, 0L),
+                    new FileInfo("s3://bucket/data/a.parquet", 1L, 0L)))
+        .isInstanceOf(IllegalArgumentException.class)
+        .hasMessageContaining("Can not create a Path from a null string");
+  }
+
+  private static List<String> listWithMockFileIO(String baseDir, FileInfo... 
entries) {
+    SupportsPrefixOperations mockIO = new 
StaticPrefixFileIO(ImmutableList.copyOf(entries));
+    List<String> foundFiles = Lists.newArrayList();
+    FileSystemWalker.listDirRecursivelyWithFileIO(
+        mockIO, baseDir, null, fileInfo -> true, foundFiles::add);
+    return foundFiles;
+  }
+
+  private static class StaticPrefixFileIO implements SupportsPrefixOperations {
+    private final Iterable<FileInfo> entries;
+
+    StaticPrefixFileIO(Iterable<FileInfo> entries) {
+      this.entries = entries;
+    }
+
+    @Override
+    public Iterable<FileInfo> listPrefix(String prefix) {
+      return entries;
+    }
+
+    @Override
+    public void deletePrefix(String prefix) {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public InputFile newInputFile(String path) {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public OutputFile newOutputFile(String path) {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public void deleteFile(String path) {
+      throw new UnsupportedOperationException();
+    }
+  }
 }

Reply via email to