This is an automated email from the ASF dual-hosted git repository.
junhao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git
The following commit(s) were added to refs/heads/master by this push:
new aa92afaec [core] Add listDirectories to FileIO (#3205)
aa92afaec is described below
commit aa92afaecfa6dd7dba57c130efe9262f75344b5e
Author: Jingsong Lee <[email protected]>
AuthorDate: Fri Apr 12 18:09:59 2024 +0800
[core] Add listDirectories to FileIO (#3205)
---
.../src/main/java/org/apache/paimon/fs/FileIO.java | 17 +++++++++++++++
.../apache/paimon/catalog/FileSystemCatalog.java | 5 +++--
.../org/apache/paimon/utils/BranchManager.java | 6 +++---
.../java/org/apache/paimon/utils/FileUtils.java | 24 ++++++++++++++++++++++
4 files changed, 47 insertions(+), 5 deletions(-)
diff --git a/paimon-common/src/main/java/org/apache/paimon/fs/FileIO.java
b/paimon-common/src/main/java/org/apache/paimon/fs/FileIO.java
index 84c1040ea..ae8ed60eb 100644
--- a/paimon-common/src/main/java/org/apache/paimon/fs/FileIO.java
+++ b/paimon-common/src/main/java/org/apache/paimon/fs/FileIO.java
@@ -37,6 +37,7 @@ import java.io.Serializable;
import java.net.URI;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
@@ -102,6 +103,22 @@ public interface FileIO extends Serializable {
*/
FileStatus[] listStatus(Path path) throws IOException;
+ /**
+ * List the statuses of the directories in the given path if the path is a
directory.
+ *
+ * <p>{@link FileIO} implementation may have optimization for list
directories.
+ *
+ * @param path given path
+ * @return the statuses of the directories in the given path
+ */
+ default FileStatus[] listDirectories(Path path) throws IOException {
+ FileStatus[] statuses = listStatus(path);
+ if (statuses != null) {
+ statuses =
Arrays.stream(statuses).filter(FileStatus::isDir).toArray(FileStatus[]::new);
+ }
+ return statuses;
+ }
+
/**
* Check if exists.
*
diff --git
a/paimon-core/src/main/java/org/apache/paimon/catalog/FileSystemCatalog.java
b/paimon-core/src/main/java/org/apache/paimon/catalog/FileSystemCatalog.java
index 8ffe0f271..1e4e5b0eb 100644
--- a/paimon-core/src/main/java/org/apache/paimon/catalog/FileSystemCatalog.java
+++ b/paimon-core/src/main/java/org/apache/paimon/catalog/FileSystemCatalog.java
@@ -59,7 +59,7 @@ public class FileSystemCatalog extends AbstractCatalog {
@Override
public List<String> listDatabases() {
List<String> databases = new ArrayList<>();
- for (FileStatus status : uncheck(() -> fileIO.listStatus(warehouse))) {
+ for (FileStatus status : uncheck(() ->
fileIO.listDirectories(warehouse))) {
Path path = status.getPath();
if (status.isDir() && isDatabase(path)) {
databases.add(database(path));
@@ -100,7 +100,8 @@ public class FileSystemCatalog extends AbstractCatalog {
@Override
protected List<String> listTablesImpl(String databaseName) {
List<String> tables = new ArrayList<>();
- for (FileStatus status : uncheck(() ->
fileIO.listStatus(newDatabasePath(databaseName)))) {
+ for (FileStatus status :
+ uncheck(() ->
fileIO.listDirectories(newDatabasePath(databaseName)))) {
if (status.isDir() && tableExists(status.getPath())) {
tables.add(status.getPath().getName());
}
diff --git
a/paimon-core/src/main/java/org/apache/paimon/utils/BranchManager.java
b/paimon-core/src/main/java/org/apache/paimon/utils/BranchManager.java
index 95034e05a..4656deb67 100644
--- a/paimon-core/src/main/java/org/apache/paimon/utils/BranchManager.java
+++ b/paimon-core/src/main/java/org/apache/paimon/utils/BranchManager.java
@@ -37,7 +37,7 @@ import java.util.PriorityQueue;
import java.util.SortedMap;
import java.util.stream.Collectors;
-import static org.apache.paimon.utils.FileUtils.listVersionedFileStatus;
+import static org.apache.paimon.utils.FileUtils.listVersionedDirectories;
import static org.apache.paimon.utils.Preconditions.checkArgument;
/** Manager for {@code Branch}. */
@@ -153,7 +153,7 @@ public class BranchManager {
/** Get branch count for the table. */
public long branchCount() {
try {
- return listVersionedFileStatus(fileIO, branchDirectory(),
BRANCH_PREFIX).count();
+ return listVersionedDirectories(fileIO, branchDirectory(),
BRANCH_PREFIX).count();
} catch (IOException e) {
throw new RuntimeException(e);
}
@@ -163,7 +163,7 @@ public class BranchManager {
public List<TableBranch> branches() {
try {
List<Pair<Path, Long>> paths =
- listVersionedFileStatus(fileIO, branchDirectory(),
BRANCH_PREFIX)
+ listVersionedDirectories(fileIO, branchDirectory(),
BRANCH_PREFIX)
.map(status -> Pair.of(status.getPath(),
status.getModificationTime()))
.collect(Collectors.toList());
PriorityQueue<TableBranch> pq =
diff --git a/paimon-core/src/main/java/org/apache/paimon/utils/FileUtils.java
b/paimon-core/src/main/java/org/apache/paimon/utils/FileUtils.java
index eddc7273e..17823f34d 100644
--- a/paimon-core/src/main/java/org/apache/paimon/utils/FileUtils.java
+++ b/paimon-core/src/main/java/org/apache/paimon/utils/FileUtils.java
@@ -118,6 +118,30 @@ public class FileUtils {
.filter(status ->
status.getPath().getName().startsWith(prefix));
}
+ /**
+ * List versioned directories for the directory.
+ *
+ * @return file status stream
+ */
+ public static Stream<FileStatus> listVersionedDirectories(
+ FileIO fileIO, Path dir, String prefix) throws IOException {
+ if (!fileIO.exists(dir)) {
+ return Stream.empty();
+ }
+
+ FileStatus[] statuses = fileIO.listDirectories(dir);
+
+ if (statuses == null) {
+ throw new RuntimeException(
+ String.format(
+ "The return value is null of the listStatus for
the '%s' directory.",
+ dir));
+ }
+
+ return Arrays.stream(statuses)
+ .filter(status ->
status.getPath().getName().startsWith(prefix));
+ }
+
public static void checkExists(FileIO fileIO, Path file) throws
IOException {
if (!fileIO.exists(file)) {
throw new FileNotFoundException(