jerryshao commented on code in PR #7009:
URL: https://github.com/apache/gravitino/pull/7009#discussion_r2052067823


##########
docs/manage-fileset-metadata-using-gravitino.md:
##########
@@ -429,34 +458,198 @@ 
catalog.as_fileset_catalog().create_fileset(ident=NameIdentifier.of("test_schema
 </TabItem>
 </Tabs>
 
-The value of `storageLocation` depends on the configuration settings of the 
catalog:
-- If this is a local fileset catalog, the `storageLocation` should be in the 
format of `file:///path/to/fileset`.
-- If this is a HDFS fileset catalog, the `storageLocation` should be in the 
format of `hdfs://namenode:port/path/to/fileset`.
-
-For a `MANAGED` fileset, the storage location is:
-
-1. The one specified by the user during the fileset creation, and the 
placeholder will be replaced by the
-   corresponding fileset property value.
-2. When the catalog property `location` is specified but the schema property 
`location` isn't specified, the storage location is:
-   1. `catalog location/schema name/fileset name` if `catalog location` does 
not contain any placeholder. 
-   2. `catalog location` - placeholders in the catalog location will be 
replaced by the corresponding fileset property value.
-
-3. When the catalog property `location` isn't specified but the schema 
property `location` is specified,
-   the storage location is:
-   1. `schema location/fileset name` if `schema location` does not contain any 
placeholder.
-   2. `schema location` - placeholders in the schema location will be replaced 
by the corresponding fileset property value.
-   
-4. When both the catalog property `location` and the schema property 
`location` are specified, the storage
-   location is:
-   1. `schema location/fileset name` if `schema location` does not contain any 
placeholder.
-   2. `schema location` - placeholders in the schema location will be replaced 
by the corresponding fileset property value.
-
-5. When both the catalog property `location` and schema property `location` 
isn't specified, the user
-   should specify the `storageLocation` in the fileset creation.
-
-For `EXTERNAL` fileset, users should specify `storageLocation` during the 
fileset creation,
-otherwise, Gravitino will throw an exception. If the `storageLocation` 
contains placeholders, the
-placeholder will be replaced by the corresponding fileset property value.
+#### storageLocations
+You can also create a fileset with multiple storage locations. The 
`storageLocations` is a map of location name to storage location.
+The generation rules of each location follow the generation rules of a single 
location.
+The following is an example of creating a fileset with multiple storage 
locations:
+
+<Tabs groupId="language" queryString>
+<TabItem value="shell" label="Shell">
+
+```shell
+# create a catalog first
+curl -X POST -H "Accept: application/vnd.gravitino.v1+json" \
+-H "Content-Type: application/json" -d '{
+  "name": "test_catalog",
+  "type": "FILESET",
+  "comment": "comment",
+  "provider": "hadoop",
+  "properties": {
+    "filesystem-providers": "builtin-local,builtin-hdfs,s3,gcs",
+    "location-l1": 
"file:///{{catalog}}/{{schema}}/workspace_{{project}}/{{user}}",
+    "location-l2": 
"hdfs:///{{catalog}}/{{schema}}/workspace_{{project}}/{{user}}"
+  }
+}' http://localhost:8090/api/metalakes/metalake/catalogs
+
+# create a schema under the catalog
+curl -X POST -H "Accept: application/vnd.gravitino.v1+json" \
+-H "Content-Type: application/json" -d '{
+  "name": "test_schema",
+  "comment": "comment",
+  "properties": {
+    "location-l3": 
"s3a://myBucket/{{catalog}}/{{schema}}/workspace_{{project}}/{{user}}"
+  }
+}' http://localhost:8090/api/metalakes/metalake/catalogs/test_catalog/schemas
+
+# create a fileset by placeholders
+curl -X POST -H "Accept: application/vnd.gravitino.v1+json" \
+-H "Content-Type: application/json" -d '{
+  "name": "example_fileset",
+  "comment": "This is an example fileset",
+  "type": "MANAGED",
+  "storageLocations": {
+    "l4": "gs://myBucket/{{catalog}}/{{schema}}/workspace_{{project}}/{{user}}"
+  },
+  "properties": {
+    "placeholder-project": "test_project",
+    "placeholder-user": "test_user",
+    "default-location-name": "l1"
+  }
+}' 
http://localhost:8090/api/metalakes/metalake/catalogs/test_catalog/schemas/test_schema/filesets
+
+# the fileset will be created with 4 storage locations:
+{
+  "name": "example_fileset",
+  "comment": "This is an example fileset",
+  "type": "MANAGED",
+  "storageLocation": null,
+  "storageLocations": {
+    "l1": "file:///test_catalog/test_schema/workspace_test_project/test_user",
+    "l2": "hdfs:///test_catalog/test_schema/workspace_test_project/test_user",
+    "l3": 
"s3a://myBucket/test_catalog/test_schema/workspace_test_project/test_user",
+    "l4": 
"gs://myBucket/test_catalog/test_schema/workspace_test_project/test_user"
+  },
+  "properties": {
+    "placeholder-project": "test_project",
+    "placeholder-user": "test_user",
+    "default-location-name": "l1"
+  }
+}
+```
+
+</TabItem>
+<TabItem value="java" label="Java">
+
+```java
+GravitinoClient gravitinoClient = GravitinoClient
+    .builder("http://localhost:8090";)
+    .withMetalake("metalake")
+    .build();
+// create a catalog first
+Catalog catalog = gravitinoClient.createCatalog(
+    "test_catalog",
+    Type.FILESET,
+    "hadoop", // provider
+    "comment",
+    ImmutableMap.of(
+        "filesystem-providers", "builtin-local,builtin-hdfs,s3,gcs",
+        "location-l1", 
"file:///{{catalog}}/{{schema}}/workspace_{{project}}/{{user}}",
+        "location-l2", 
"hdfs:///{{catalog}}/{{schema}}/workspace_{{project}}/{{user}}"));
+FilesetCatalog filesetCatalog = catalog.asFilesetCatalog();
+
+// create a schema under the catalog
+filesetCatalog.createSchema(
+    "test_schema",
+    "comment",
+    ImmutableMap.of("location-l3", 
"s3a://myBucket/{{catalog}}/{{schema}}/workspace_{{project}}/{{user}}"));
+
+// create a fileset by placeholders
+filesetCatalog.createMultipleLocationFileset(
+  NameIdentifier.of("test_schema", "example_fileset"),
+  "This is an example fileset",
+  Fileset.Type.MANAGED,
+  ImmutableMap.of("l4", 
"gs://myBucket/{{catalog}}/{{schema}}/workspace_{{project}}/{{user}}"),
+  ImmutableMap.of(
+      "placeholder-project", "test_project", 
+      "placeholder-user", "test_user",
+      "default-location-name", "l1")
+);
+
+// the fileset will be created with 4 storage locations:
+{
+  "name": "example_fileset",
+  "comment": "This is an example fileset",
+  "type": "MANAGED",
+  "storageLocation": null,
+  "storageLocations": {
+    "l1": "file:///test_catalog/test_schema/workspace_test_project/test_user",
+    "l2": "hdfs:///test_catalog/test_schema/workspace_test_project/test_user",
+    "l3": 
"s3a://myBucket/test_catalog/test_schema/workspace_test_project/test_user",
+    "l4": 
"gs://myBucket/test_catalog/test_schema/workspace_test_project/test_user"
+  },
+  "properties": {
+    "placeholder-project": "test_project",
+    "placeholder-user": "test_user",
+    "default-location-name": "l1"
+  }
+}
+```
+
+</TabItem>
+<TabItem value="python" label="Python">
+
+```python
+gravitino_client: GravitinoClient = 
GravitinoClient(uri="http://localhost:8090";, metalake_name="metalake")
+
+# create a catalog first
+catalog: Catalog = gravitino_client.create_catalog(
+   name="test_catalog",
+   catalog_type=Catalog.Type.FILESET,
+   provider="hadoop",
+   comment="comment",
+   properties={
+      "filesystem-providers": "builtin-local,builtin-hdfs,s3,gcs",
+      "location-l1": 
"file:///{{catalog}}/{{schema}}/workspace_{{project}}/{{user}}",
+      "location-l2": 
"hdfs:///{{catalog}}/{{schema}}/workspace_{{project}}/{{user}}",
+    }
+)
+
+# create a schema under the catalog
+catalog.as_schemas().create_schema(
+   name="test_schema",
+   comment="comment",
+   properties={
+      "location-l3": 
"s3a://myBucket/{{catalog}}/{{schema}}/workspace_{{project}}/{{user}}",
+   }
+)
+
+# create a fileset by placeholders
+catalog.as_fileset_catalog().create_multiple_location_fileset(
+    ident=NameIdentifier.of("test_schema", "example_fileset"),
+    type=Fileset.Type.MANAGED,
+    comment="This is an example fileset",
+    storage_locations={
+        "l4": 
"gs://myBucket/{{catalog}}/{{schema}}/workspace_{{project}}/{{user}}",
+    },
+    roperties={
+       "placeholder-project": "test_project",
+       "placeholder-user": "test_user",
+       "default-location-name": "l1",
+    }
+)
+
+# the fileset will be created with 4 storage locations:
+{
+  "name": "example_fileset",
+  "comment": "This is an example fileset",
+  "type": "MANAGED",
+  "storageLocation": null,
+  "storageLocations": {
+    "l1": "file:///test_catalog/test_schema/workspace_test_project/test_user",
+    "l2": "hdfs:///test_catalog/test_schema/workspace_test_project/test_user",
+    "l3": 
"s3a://myBucket/test_catalog/test_schema/workspace_test_project/test_user",
+    "l4": 
"gs://myBucket/test_catalog/test_schema/workspace_test_project/test_user"
+  },
+  "properties": {
+    "placeholder-project": "test_project",
+    "placeholder-user": "test_user",
+    "default-location-name": "l1"
+  }
+}
+```
+
+</TabItem>
+</Tabs>

Review Comment:
   Can you please add an example of how to use multiple fileset from GVFS level?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to