This is an automated email from the ASF dual-hosted git repository.

jshao pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino.git


The following commit(s) were added to refs/heads/main by this push:
     new fce0bf0cef [#7793] improvement(fileset-catalog): Remove single file 
check in FilesetCatalogOperations (#7794)
fce0bf0cef is described below

commit fce0bf0cef3ef976e48bd4ded83c3eb22a41e8bc
Author: Mini Yu <[email protected]>
AuthorDate: Wed Aug 13 18:20:48 2025 +0800

    [#7793] improvement(fileset-catalog): Remove single file check in 
FilesetCatalogOperations (#7794)
    
    ### What changes were proposed in this pull request?
    
    Remove single file check logic in FilesetCatalogOperations.
    
    ### Why are the changes needed?
    
    It's not needed.
    
    Fix: #7793
    
    ### Does this PR introduce _any_ user-facing change?
    
    N/A.
    
    ### How was this patch tested?
    
    Existing tests.
---
 .../catalog/fileset/FilesetCatalogOperations.java  | 89 ++++++++++------------
 .../fileset/TestFilesetCatalogOperations.java      |  4 +-
 .../fileset/integration/test/FilesetCatalogIT.java | 17 +++++
 docs/fileset-catalog.md                            |  4 +-
 docs/manage-fileset-metadata-using-gravitino.md    |  4 +-
 5 files changed, 64 insertions(+), 54 deletions(-)

diff --git 
a/catalogs/catalog-fileset/src/main/java/org/apache/gravitino/catalog/fileset/FilesetCatalogOperations.java
 
b/catalogs/catalog-fileset/src/main/java/org/apache/gravitino/catalog/fileset/FilesetCatalogOperations.java
index d49c47c288..9f67123aa0 100644
--- 
a/catalogs/catalog-fileset/src/main/java/org/apache/gravitino/catalog/fileset/FilesetCatalogOperations.java
+++ 
b/catalogs/catalog-fileset/src/main/java/org/apache/gravitino/catalog/fileset/FilesetCatalogOperations.java
@@ -35,7 +35,6 @@ import com.google.common.base.Preconditions;
 import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.Maps;
 import com.google.common.util.concurrent.ThreadFactoryBuilder;
-import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.time.Instant;
 import java.util.Arrays;
@@ -447,6 +446,15 @@ public class FilesetCatalogOperations extends 
ManagedSchemaOperations
 
           filesetPathsBuilder.put(entry.getKey(), formalizePath);
           FileSystem fs = getFileSystemWithCache(formalizePath, conf);
+
+          if (fs.exists(formalizePath) && 
fs.getFileStatus(formalizePath).isFile()) {
+            throw new IllegalArgumentException(
+                "Fileset location cannot be a file: "
+                    + formalizePath
+                    + ", location name: "
+                    + entry.getKey());
+          }
+
           if (!fs.exists(formalizePath)) {
             if (!fs.mkdirs(formalizePath)) {
               throw new RuntimeException(
@@ -680,24 +688,6 @@ public class FilesetCatalogOperations extends 
ManagedSchemaOperations
           "Location name %s does not exist in fileset %s", targetLocationName, 
ident);
     }
 
-    boolean isSingleFile = false;
-    if (disableFSOps) {
-      LOG.warn(
-          "Filesystem operations are disabled in the server side, we cannot 
check if the "
-              + "storage location mounts to a directory or single file, we 
assume it is a directory"
-              + "(in most of the cases). If it happens to be a single file, 
then the generated "
-              + "file location may be a wrong path. Please avoid using Fileset 
to manage a single"
-              + " file path.");
-    } else {
-      isSingleFile = checkSingleFile(fileset, targetLocationName);
-    }
-
-    // if the storage location is a single file, it cannot have sub path to 
access.
-    if (isSingleFile && StringUtils.isNotBlank(processedSubPath)) {
-      throw new GravitinoRuntimeException(
-          "Sub path should always be blank, because the fileset only mounts a 
single file.");
-    }
-
     // do checks for some data operations.
     if (hasCallerContext()) {
       Map<String, String> contextMap = 
CallerContext.CallerContextHolder.get().context();
@@ -713,14 +703,8 @@ public class FilesetCatalogOperations extends 
ManagedSchemaOperations
         FilesetDataOperation dataOperation = 
FilesetDataOperation.valueOf(operation);
         switch (dataOperation) {
           case RENAME:
-            // Fileset only mounts a single file, the storage location of the 
fileset cannot be
-            // renamed; Otherwise the metadata in the Gravitino server may be 
inconsistent.
-            if (isSingleFile) {
-              throw new GravitinoRuntimeException(
-                  "Cannot rename the fileset: %s which only mounts to a single 
file.", ident);
-            }
-            // if the sub path is blank, it cannot be renamed,
-            // otherwise the metadata in the Gravitino server may be 
inconsistent.
+            // if the sub path is blank, it cannot be renamed otherwise the 
metadata in the
+            // Gravitino server may be inconsistent.
             if (StringUtils.isBlank(processedSubPath)
                 || (processedSubPath.startsWith(SLASH) && 
processedSubPath.length() == 1)) {
               throw new GravitinoRuntimeException(
@@ -734,13 +718,12 @@ public class FilesetCatalogOperations extends 
ManagedSchemaOperations
     }
 
     String fileLocation;
-    // 1. if the storage location is a single file, we pass the storage 
location directly
-    // 2. if the processed sub path is blank, we pass the storage location 
directly
-    if (isSingleFile || StringUtils.isBlank(processedSubPath)) {
+    // If the processed sub path is blank, we pass the storage location 
directly
+    if (StringUtils.isBlank(processedSubPath)) {
       fileLocation = fileset.storageLocations().get(targetLocationName);
     } else {
       // the processed sub path always starts with "/" if it is not blank,
-      // so we can safely remove the tailing slash if storage location ends 
with "/".
+      // so we can safely remove the tailing slash if the storage location 
ends with "/".
       String storageLocation =
           
removeTrailingSlash(fileset.storageLocations().get(targetLocationName));
       fileLocation = String.format("%s%s", storageLocation, processedSubPath);
@@ -769,6 +752,14 @@ public class FilesetCatalogOperations extends 
ManagedSchemaOperations
           if (schemaPath != null && 
!containsPlaceholder(schemaPath.toString())) {
             try {
               FileSystem fs = getFileSystemWithCache(schemaPath, conf);
+              if (fs.exists(schemaPath) && 
fs.getFileStatus(schemaPath).isFile()) {
+                throw new IllegalArgumentException(
+                    "Fileset schema location cannot be a file: "
+                        + schemaPath
+                        + ", location name: "
+                        + locationName);
+              }
+
               if (!fs.exists(schemaPath)) {
                 if (!fs.mkdirs(schemaPath)) {
                   // Fail the operation when failed to create the schema path.
@@ -1057,6 +1048,24 @@ public class FilesetCatalogOperations extends 
ManagedSchemaOperations
             }
 
             checkPlaceholderValue(v);
+
+            if (!disableFSOps && !containsPlaceholder(v)) {
+              Path path = new Path(v);
+              FileSystem fs = getFileSystemWithCache(path, conf);
+              try {
+                if (fs.exists(path) && fs.getFileStatus(path).isFile()) {
+                  throw new IllegalArgumentException(
+                      "Fileset catalog location cannot be a file: "
+                          + v
+                          + ", location name: "
+                          + locationName);
+                }
+              } catch (IOException e) {
+                throw new RuntimeException(
+                    "Failed to check if fileset catalog location exists: " + 
v, e);
+              }
+            }
+
             catalogStorageLocations.put(locationName, new 
Path((ensureTrailingSlash(v))));
           }
         });
@@ -1321,24 +1330,6 @@ public class FilesetCatalogOperations extends 
ManagedSchemaOperations
         });
   }
 
-  private boolean checkSingleFile(Fileset fileset, String locationName) {
-    try {
-      Path locationPath = new 
Path(fileset.storageLocations().get(locationName));
-      FileSystem fileSystem = getFileSystemWithCache(locationPath, conf);
-      return fileSystem.getFileStatus(locationPath).isFile();
-    } catch (FileNotFoundException e) {
-      // We should always return false here, same with the logic in 
`FileSystem.isFile(Path f)`.
-      return false;
-    } catch (IOException e) {
-      throw new GravitinoRuntimeException(
-          e,
-          "Exception occurs when checking whether fileset: %s "
-              + "mounts a single file with location name: %s",
-          fileset.name(),
-          locationName);
-    }
-  }
-
   private String buildGVFSFilePath(
       String catalogName, String schemaName, String filesetName, String 
subPath) {
     String prefix = String.join(SLASH, "/fileset", catalogName, schemaName, 
filesetName);
diff --git 
a/catalogs/catalog-fileset/src/test/java/org/apache/gravitino/catalog/fileset/TestFilesetCatalogOperations.java
 
b/catalogs/catalog-fileset/src/test/java/org/apache/gravitino/catalog/fileset/TestFilesetCatalogOperations.java
index a98e54bd8e..3007f93a4f 100644
--- 
a/catalogs/catalog-fileset/src/test/java/org/apache/gravitino/catalog/fileset/TestFilesetCatalogOperations.java
+++ 
b/catalogs/catalog-fileset/src/test/java/org/apache/gravitino/catalog/fileset/TestFilesetCatalogOperations.java
@@ -1381,8 +1381,8 @@ public class TestFilesetCatalogOperations {
       CallerContext callerContext = 
CallerContext.builder().withContext(contextMap).build();
       CallerContext.CallerContextHolder.set(callerContext);
 
-      Assertions.assertThrows(
-          GravitinoRuntimeException.class, () -> 
ops.getFileLocation(filesetIdent, subPath));
+      // We have removed the check for single file in getFileLocation method.
+      Assertions.assertDoesNotThrow(() -> ops.getFileLocation(filesetIdent, 
subPath));
     } finally {
       CallerContext.CallerContextHolder.remove();
     }
diff --git 
a/catalogs/catalog-fileset/src/test/java/org/apache/gravitino/catalog/fileset/integration/test/FilesetCatalogIT.java
 
b/catalogs/catalog-fileset/src/test/java/org/apache/gravitino/catalog/fileset/integration/test/FilesetCatalogIT.java
index 66e3ac4ff9..b1a9efb14b 100644
--- 
a/catalogs/catalog-fileset/src/test/java/org/apache/gravitino/catalog/fileset/integration/test/FilesetCatalogIT.java
+++ 
b/catalogs/catalog-fileset/src/test/java/org/apache/gravitino/catalog/fileset/integration/test/FilesetCatalogIT.java
@@ -451,6 +451,23 @@ public class FilesetCatalogIT extends BaseIT {
         createFileset(filesetName3, "comment", null, storageLocation3, 
ImmutableMap.of("k1", "v1"));
     assertFilesetExists(filesetName3);
     Assertions.assertEquals(MANAGED, fileset3.type(), "fileset type should be 
MANAGED by default");
+
+    // Test create fileset with a single file
+    String filesetName7 = "test_create_fileset_with_a_file";
+    String storageLocation7 = storageLocation(filesetName7);
+    String filePath = storageLocation7 + "/file1.txt";
+    fileSystem.createNewFile(new Path(filePath));
+    Assertions.assertTrue(fileSystem.getFileStatus(new 
Path(filePath)).isFile());
+
+    Throwable throwable =
+        Assertions.assertThrows(
+            RuntimeException.class,
+            () -> {
+              createFileset(
+                  filesetName7, "comment", MANAGED, filePath, 
ImmutableMap.of("k1", "v1"));
+            });
+
+    Assertions.assertTrue(throwable.getMessage().contains("Fileset location 
cannot be a file"));
   }
 
   @Test
diff --git a/docs/fileset-catalog.md b/docs/fileset-catalog.md
index a34b32260e..6bb194a5f4 100644
--- a/docs/fileset-catalog.md
+++ b/docs/fileset-catalog.md
@@ -29,7 +29,7 @@ Besides the [common catalog 
properties](./gravitino-server-config.md#apache-grav
 
 | Property Name                        | Description                           
                                                                                
                                                                                
                                                                                
                              | Default Value   | Required | Since Version    |
 
|--------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------|----------|------------------|
-| `location`                           | The storage location managed by 
Fileset catalog. Its location name is `unknown`.                                
                                                                                
                                                                                
                                    | (none)          | No       | 0.5.0        
    |
+| `location`                           | The storage location managed by 
Fileset catalog. Its location name is `unknown`. The value should always a 
directory(HDFS) or path prefix(cloud storage like S3, GCS.) and does not 
support a single file.                                                          
                                                | (none)          | No       | 
0.5.0            |
 | `location-`                          | The property prefix. User can use 
`location-{name}={path}` to set multiple locations with different names for the 
catalog.                                                                        
                                                                                
                                  | (none)          | No       | 
0.9.0-incubating |
 | `default-filesystem-provider`        | The default filesystem provider of 
this Fileset catalog if users do not specify the scheme in the URI. Candidate 
values are 'builtin-local', 'builtin-hdfs', 's3', 'gcs', 'abs' and 'oss'. 
Default value is `builtin-local`. For S3, if we set this value to 's3', we can 
omit the prefix 's3a://' in the location. | `builtin-local` | No       | 
0.7.0-incubating |
 | `filesystem-providers`               | The file system providers to add. 
Users need to set this configuration to support cloud storage or custom HCFS. 
For instance, set it to `s3` or a comma separated string that contains `s3` 
like `gs,s3` to support multiple kinds of fileset including `s3`.               
                                        | (none)          | Yes      | 
0.7.0-incubating |
@@ -115,7 +115,7 @@ The Fileset catalog supports creating, updating, deleting, 
and listing schema.
 
 | Property name                         | Description                          
                                                                                
     | Default value             | Required | Since Version    |
 
|---------------------------------------|---------------------------------------------------------------------------------------------------------------------------|---------------------------|----------|------------------|
-| `location`                            | The storage location managed by 
schema. Its location name is `unknown`.                                         
          | (none)                    | No       | 0.5.0            |
+| `location`                            | The storage location managed by 
schema. Its location name is `unknown`. It's also should be a directory or path 
prefix.   | (none)                    | No       | 0.5.0            |
 | `location-`                           | The property prefix. User can use 
`location-{name}={path}` to set multiple locations with different names for the 
schema. | (none)                    | No       | 0.9.0-incubating |
 | `authentication.impersonation-enable` | Whether to enable impersonation for 
this schema of the Fileset catalog.                                             
      | The parent(catalog) value | No       | 0.6.0-incubating |
 | `authentication.type`                 | The type of authentication for this 
schema of Fileset catalog , currently we only support `kerberos`, `simple`.     
      | The parent(catalog) value | No       | 0.6.0-incubating |
diff --git a/docs/manage-fileset-metadata-using-gravitino.md 
b/docs/manage-fileset-metadata-using-gravitino.md
index 7faa351a47..79929d6e95 100644
--- a/docs/manage-fileset-metadata-using-gravitino.md
+++ b/docs/manage-fileset-metadata-using-gravitino.md
@@ -117,8 +117,10 @@ Refer to [Drop a 
catalog](./manage-relational-metadata-using-gravitino.md#drop-a
 in relational catalog for more details. For a fileset catalog, the drop 
operation is the same.
 
 :::note
-Currently, Gravitino doesn't support dropping a catalog with schemas and 
filesets under it. You have
+- Currently, Gravitino doesn't support dropping a catalog with schemas and 
filesets under it. You have
 to drop all the schemas and filesets under the catalog before dropping the 
catalog.
+- The value of location property in a catalog should be a directory on a file 
system
+  (HDFS) or path prefix on a cloud storage (S3, GCS, etc.).** The same goes 
for schema and fileset.
 :::
 
 ### List all catalogs in a metalake

Reply via email to