This is an automated email from the ASF dual-hosted git repository.
jshao pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino.git
The following commit(s) were added to refs/heads/main by this push:
new 8e48de3228 [#5933] doc(catalog-model): Add docs for model management
(#6052)
8e48de3228 is described below
commit 8e48de32287ab0c0b8b3bcfbd022a1ee00b5485e
Author: Jerry Shao <[email protected]>
AuthorDate: Tue Jan 7 09:32:26 2025 +0800
[#5933] doc(catalog-model): Add docs for model management (#6052)
### What changes were proposed in this pull request?
Add the docs for model management.
### Why are the changes needed?
This is part of work to support model management in Gravitino.
Fix: #5933
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
N/A
---
.../gravitino/client/GenericModelCatalog.java | 2 +-
.../gravitino/client/TestGenericModelCatalog.java | 3 +-
.../gravitino/client/generic_model_catalog.py | 2 +-
docs/assets/gravitino-model-arch.png | Bin 270264 -> 281743 bytes
docs/assets/metadata-model.png | Bin 102235 -> 0 bytes
docs/index.md | 7 +
docs/kafka-catalog.md | 2 +-
docs/manage-metalake-using-gravitino.md | 2 +-
docs/manage-model-metadata-using-gravitino.md | 637 +++++++++++++++++++++
docs/model-catalog.md | 87 +++
docs/open-api/models.yaml | 54 +-
docs/overview.md | 28 +-
.../gravitino/server/web/rest/ModelOperations.java | 2 +-
.../server/web/rest/TestModelOperations.java | 4 +
web/web/src/lib/api/models/index.js | 2 +-
15 files changed, 786 insertions(+), 46 deletions(-)
diff --git
a/clients/client-java/src/main/java/org/apache/gravitino/client/GenericModelCatalog.java
b/clients/client-java/src/main/java/org/apache/gravitino/client/GenericModelCatalog.java
index 9c1c4654d3..50e9eb246a 100644
---
a/clients/client-java/src/main/java/org/apache/gravitino/client/GenericModelCatalog.java
+++
b/clients/client-java/src/main/java/org/apache/gravitino/client/GenericModelCatalog.java
@@ -204,7 +204,7 @@ class GenericModelCatalog extends BaseSchemaCatalog
implements ModelCatalog {
NameIdentifier modelFullIdent = modelFullNameIdentifier(ident);
BaseResponse resp =
restClient.post(
- formatModelVersionRequestPath(modelFullIdent),
+ formatModelVersionRequestPath(modelFullIdent) + "/versions",
req,
BaseResponse.class,
Collections.emptyMap(),
diff --git
a/clients/client-java/src/test/java/org/apache/gravitino/client/TestGenericModelCatalog.java
b/clients/client-java/src/test/java/org/apache/gravitino/client/TestGenericModelCatalog.java
index 10e3ed678d..a3575988fc 100644
---
a/clients/client-java/src/test/java/org/apache/gravitino/client/TestGenericModelCatalog.java
+++
b/clients/client-java/src/test/java/org/apache/gravitino/client/TestGenericModelCatalog.java
@@ -380,7 +380,8 @@ public class TestGenericModelCatalog extends TestBase {
String modelVersionPath =
withSlash(
GenericModelCatalog.formatModelVersionRequestPath(
- NameIdentifier.of(METALAKE_NAME, CATALOG_NAME, "schema1",
"model1")));
+ NameIdentifier.of(METALAKE_NAME, CATALOG_NAME, "schema1",
"model1"))
+ + "/versions");
ModelVersionLinkRequest request =
new ModelVersionLinkRequest(
diff --git a/clients/client-python/gravitino/client/generic_model_catalog.py
b/clients/client-python/gravitino/client/generic_model_catalog.py
index ca6b5cd31f..89bf29be13 100644
--- a/clients/client-python/gravitino/client/generic_model_catalog.py
+++ b/clients/client-python/gravitino/client/generic_model_catalog.py
@@ -303,7 +303,7 @@ class GenericModelCatalog(BaseSchemaCatalog):
request.validate()
resp = self.rest_client.post(
- f"{self._format_model_version_request_path(model_full_ident)}",
+
f"{self._format_model_version_request_path(model_full_ident)}/versions",
request,
error_handler=MODEL_ERROR_HANDLER,
)
diff --git a/docs/assets/gravitino-model-arch.png
b/docs/assets/gravitino-model-arch.png
index de10689c07..5f43f1c29a 100644
Binary files a/docs/assets/gravitino-model-arch.png and
b/docs/assets/gravitino-model-arch.png differ
diff --git a/docs/assets/metadata-model.png b/docs/assets/metadata-model.png
deleted file mode 100644
index 143cb292bb..0000000000
Binary files a/docs/assets/metadata-model.png and /dev/null differ
diff --git a/docs/index.md b/docs/index.md
index 2bc4d53b3f..401e6c1d0a 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -56,6 +56,8 @@ REST API and the Java SDK. You can use either to manage
metadata. See
how to manage fileset metadata.
* [Manage messaging metadata using
Gravitino](./manage-messaging-metadata-using-gravitino.md) to learn how to
manage
messaging metadata.
+* [Manage model metadata using
Gravitino](./manage-model-metadata-using-gravitino.md) to learn how to manage
+ model metadata.
Also, you can find the complete REST API definition in
[Gravitino Open API](./api/rest/gravitino-rest-api),
@@ -88,6 +90,10 @@ Gravitino currently supports the following catalogs:
* [**Kafka catalog**](./kafka-catalog.md)
+**Model catalogs:**
+
+* [**Model catalog**](./model-catalog.md)
+
## Apache Gravitino playground
To experience Gravitino with other components easily, Gravitino provides a
playground to run. It
@@ -119,6 +125,7 @@ Gravitino supports different catalogs to manage the
metadata in different source
* [Hadoop catalog](./hadoop-catalog.md): a complete guide to using Gravitino
to manage fileset
using Hadoop Compatible File System (HCFS).
* [Kafka catalog](./kafka-catalog.md): a complete guide to using Gravitino to
manage Kafka topics metadata.
+* [Model catalog](./model-catalog.md): a complete guide to using Gravitino to
manage model metadata.
### Governance
diff --git a/docs/kafka-catalog.md b/docs/kafka-catalog.md
index 0c32bc59b7..b9901ec9d7 100644
--- a/docs/kafka-catalog.md
+++ b/docs/kafka-catalog.md
@@ -15,7 +15,7 @@ One Kafka catalog corresponds to one Kafka cluster.
### Catalog properties
-Besides the [common catalog
properties](./gravitino-server-config.md#gravitino-catalog-properties-configuration),
the Kafka catalog has the following properties:
+Besides the [common catalog
properties](./gravitino-server-config.md#apache-gravitino-catalog-properties-configuration),
the Kafka catalog has the following properties:
| Property Name | Description
|
Default Value | Required | Since Version |
|---------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------|----------|---------------|
diff --git a/docs/manage-metalake-using-gravitino.md
b/docs/manage-metalake-using-gravitino.md
index 09d5c2dc63..ae15bbbb15 100644
--- a/docs/manage-metalake-using-gravitino.md
+++ b/docs/manage-metalake-using-gravitino.md
@@ -11,7 +11,7 @@ import TabItem from '@theme/TabItem';
This page introduces how to create, modify, view, and delete
[metalakes](./glossary.md#metalake) by using Gravitino.
-## Prerequsites
+## Prerequisites
You have installed and launched Gravitino. For more details, see [Get
started](./getting-started.md).
diff --git a/docs/manage-model-metadata-using-gravitino.md
b/docs/manage-model-metadata-using-gravitino.md
new file mode 100644
index 0000000000..519f79b7f7
--- /dev/null
+++ b/docs/manage-model-metadata-using-gravitino.md
@@ -0,0 +1,637 @@
+---
+title: Manage model metadata using Gravitino
+slug: /manage-model-metadata-using-gravitino
+date: 2024-12-26
+keyword: Gravitino model metadata manage
+license: This software is licensed under the Apache License version 2.
+---
+
+import Tabs from '@theme/Tabs';
+import TabItem from '@theme/TabItem';
+
+This page introduces how to manage model metadata in Apache Gravitino.
Gravitino model catalog
+is a kind of model registry, which provides the ability to manage machine
learning models'
+versioned metadata. It follows the typical Gravitino 3-level namespace
(catalog, schema, and
+model) and supports managing the versions for each model.
+
+Currently, it supports model and model version registering, listing, loading,
and deleting.
+
+To use the model catalog, please make sure that:
+
+ - The Gravitino server has started, and is serving at, e.g.
[http://localhost:8090](http://localhost:8090).
+ - A metalake has been created and
[enabled](./manage-metalake-using-gravitino.md#enable-a-metalake)
+
+## Catalog operations
+
+### Create a catalog
+
+:::info
+For a model catalog, you must specify the catalog `type` as `MODEL` when
creating the catalog.
+Please also be aware that the `provider` is not required for a model catalog.
+:::
+
+You can create a catalog by sending a `POST` request to the
`/api/metalakes/{metalake_name}/catalogs`
+endpoint or just use the Gravitino Java/Python client. The following is an
example of creating a
+catalog:
+
+<Tabs groupId="language" queryString>
+<TabItem value="shell" label="Shell">
+
+```shell
+curl -X POST -H "Accept: application/vnd.gravitino.v1+json" \
+-H "Content-Type: application/json" -d '{
+ "name": "model_catalog",
+ "type": "MODEL",
+ "comment": "This is a model catalog",
+ "properties": {
+ "k1": "v1"
+ }
+}' http://localhost:8090/api/metalakes/example/catalogs
+```
+
+</TabItem>
+<TabItem value="java" label="Java">
+
+```java
+GravitinoClient gravitinoClient = GravitinoClient
+ .builder("http://localhost:8090")
+ .withMetalake("example")
+ .build();
+
+Map<String, String> properties = ImmutableMap.<String, String>builder()
+ .put("k1", "v1")
+ .build();
+
+Catalog catalog = gravitinoClient.createCatalog(
+ "model_catalog",
+ Type.MODEL,
+ "This is a model catalog",
+ properties);
+```
+
+</TabItem>
+<TabItem value="python" label="Python">
+
+```python
+gravitino_client: GravitinoClient =
GravitinoClient(uri="http://localhost:8090", metalake_name="example")
+catalog = gravitino_client.create_catalog(name="model_catalog",
+ type=Catalog.Type.MODEL,
+ provider=None,
+ comment="This is a model catalog",
+ properties={"k1": "v1"})
+```
+
+</TabItem>
+</Tabs>
+
+### Load a catalog
+
+Refer to [Load a
catalog](./manage-relational-metadata-using-gravitino.md#load-a-catalog)
+in relational catalog for more details. For a model catalog, the load
operation is the same.
+
+### Alter a catalog
+
+Refer to [Alter a
catalog](./manage-relational-metadata-using-gravitino.md#alter-a-catalog)
+in relational catalog for more details. For a model catalog, the alter
operation is the same.
+
+### Drop a catalog
+
+Refer to [Drop a
catalog](./manage-relational-metadata-using-gravitino.md#drop-a-catalog)
+in relational catalog for more details. For a model catalog, the drop
operation is the same.
+
+### List all catalogs in a metalake
+
+Please refer to [List all catalogs in a
metalake](./manage-relational-metadata-using-gravitino.md#list-all-catalogs-in-a-metalake)
+in relational catalog for more details. For a model catalog, the list
operation is the same.
+
+### List all catalogs' information in a metalake
+
+Please refer to [List all catalogs' information in a
metalake](./manage-relational-metadata-using-gravitino.md#list-all-catalogs-information-in-a-metalake)
+in relational catalog for more details. For a model catalog, the list
operation is the same.
+
+## Schema operations
+
+`Schema` is a virtual namespace in a model catalog, which is used to organize
the models. It
+is similar to the concept of `schema` in the relational catalog.
+
+:::tip
+Users should create a metalake and a catalog before creating a schema.
+:::
+
+### Create a schema
+
+You can create a schema by sending a `POST` request to the
`/api/metalakes/{metalake_name}/catalogs/{catalog_name}/schemas`
+endpoint or just use the Gravitino Java/Python client. The following is an
example of creating a
+schema:
+
+<Tabs groupId="language" queryString>
+<TabItem value="shell" label="Shell">
+
+```shell
+curl -X POST -H "Accept: application/vnd.gravitino.v1+json" \
+-H "Content-Type: application/json" -d '{
+ "name": "model_schema",
+ "comment": "This is a model schema",
+ "properties": {
+ "k1": "v1"
+ }
+}' http://localhost:8090/api/metalakes/example/catalogs/model_catalog/schemas
+```
+
+</TabItem>
+<TabItem value="java" label="Java">
+
+```java
+GravitinoClient gravitinoClient = GravitinoClient
+ .builder("http://localhost:8090")
+ .withMetalake("example")
+ .build();
+
+Catalog catalog = gravitinoClient.loadCatalog("model_catalog");
+
+SupportsSchemas supportsSchemas = catalog.asSchemas();
+
+Map<String, String> schemaProperties = ImmutableMap.<String, String>builder()
+ .put("k1", "v1")
+ .build();
+Schema schema = supportsSchemas.createSchema(
+ "model_schema",
+ "This is a schema",
+ schemaProperties);
+// ...
+```
+
+</TabItem>
+<TabItem value="python" label="Python">
+
+```python
+gravitino_client: GravitinoClient =
GravitinoClient(uri="http://localhost:8090", metalake_name="example")
+
+catalog: Catalog = gravitino_client.load_catalog(name="model_catalog")
+catalog.as_schemas().create_schema(name="model_schema",
+ comment="This is a schema",
+ properties={"k1": "v1"})
+```
+
+</TabItem>
+</Tabs>
+
+### Load a schema
+
+Please refer to [Load a
schema](./manage-relational-metadata-using-gravitino.md#load-a-schema)
+in relational catalog for more details. For a model catalog, the schema load
operation is the
+same.
+
+### Alter a schema
+
+Please refer to [Alter a
schema](./manage-relational-metadata-using-gravitino.md#alter-a-schema)
+in relational catalog for more details. For a model catalog, the schema alter
operation is the
+same.
+
+### Drop a schema
+
+Please refer to [Drop a
schema](./manage-relational-metadata-using-gravitino.md#drop-a-schema)
+in relational catalog for more details. For a model catalog, the schema drop
operation is the
+same.
+
+Note that the drop operation will delete all the model metadata under this
schema if `cascade`
+set to `true`.
+
+### List all schemas under a catalog
+
+Please refer to [List all schemas under a
catalog](./manage-relational-metadata-using-gravitino.md#list-all-schemas-under-a-catalog)
+in relational catalog for more details. For a model catalog, the schema list
operation is the
+same.
+
+## Model operations
+
+:::tip
+ - Users should create a metalake, a catalog, and a schema before creating a
model.
+:::
+
+### Register a model
+
+You can register a model by sending a `POST` request to the
`/api/metalakes/{metalake_name}
+/catalogs/{catalog_name}/schemas/{schema_name}/models` endpoint or just use
the Gravitino
+Java/Python client. The following is an example of creating a model:
+
+<Tabs groupId="language" queryString>
+<TabItem value="shell" label="Shell">
+
+```shell
+curl -X POST -H "Accept: application/vnd.gravitino.v1+json" \
+-H "Content-Type: application/json" -d '{
+ "name": "example_model",
+ "comment": "This is an example model",
+ "properties": {
+ "k1": "v1"
+ }
+}'
http://localhost:8090/api/metalakes/example/catalogs/model_catalog/schemas/model_schema/models
+```
+
+</TabItem>
+<TabItem value="java" label="Java">
+
+```java
+GravitinoClient gravitinoClient = GravitinoClient
+ .builder("http://localhost:8090")
+ .withMetalake("example")
+ .build();
+
+Catalog catalog = gravitinoClient.loadCatalog("model_catalog");
+Map<String, String> propertiesMap = ImmutableMap.<String, String>builder()
+ .put("k1", "v1")
+ .build();
+
+Model model = catalog.asModelCatalog().registerModel(
+ NameIdentifier.of("model_schema", "example_model"),
+ "This is an example model",
+ propertiesMap);
+```
+
+</TabItem>
+<TabItem value="python" label="Python">
+
+```python
+gravitino_client: GravitinoClient =
GravitinoClient(uri="http://localhost:8090", metalake_name="example")
+
+catalog: Catalog = gravitino_client.load_catalog(name="model_catalog")
+model: Model =
catalog.as_model_catalog().register_model(ident=NameIdentifier.of("model_schema",
"example_model"),
+ comment="This is an
example model",
+ properties={"k1":
"v1"})
+```
+
+</TabItem>
+</Tabs>
+
+### Get a model
+
+You can get a model by sending a `GET` request to the
`/api/metalakes/{metalake_name}
+/catalogs/{catalog_name}/schemas/{schema_name}/models/{model_name}` endpoint
or by using the
+Gravitino Java/Python client. The following is an example of getting a model:
+
+<Tabs groupId="language" queryString>
+<TabItem value="shell" label="Shell">
+
+```shell
+curl -X GET -H "Accept: application/vnd.gravitino.v1+json" \
+-H "Content-Type: application/json" \
+http://localhost:8090/api/metalakes/example/catalogs/model_catalog/schemas/model_schema/models/example_model
+```
+
+</TabItem>
+<TabItem value="java" label="Java">
+
+```java
+// ...
+Catalog catalog = gravitinoClient.loadCatalog("model_catalog");
+Model model =
catalog.asModelCatalog().getModel(NameIdentifier.of("model_schema",
"example_model"));
+// ...
+```
+
+</TabItem>
+<TabItem value="python" label="Python">
+
+```python
+gravitino_client: GravitinoClient =
GravitinoClient(uri="http://localhost:8090", metalake_name="example")
+
+catalog: Catalog = gravitino_client.load_catalog(name="model_catalog")
+model: Model =
catalog.as_model_catalog().get_model(ident=NameIdentifier.of("model_schema",
"example_model"))
+```
+
+</TabItem>
+</Tabs>
+
+### Delete a model
+
+You can delete a model by sending a `DELETE` request to the
`/api/metalakes/{metalake_name}
+/catalogs/{catalog_name}/schemas/{schema_name}/models/{model_name}` endpoint
or by using the
+Gravitino Java/Python client. The following is an example of deleting a model:
+
+<Tabs groupId="language" queryString>
+<TabItem value="shell" label="Shell">
+
+```shell
+curl -X DELETE -H "Accept: application/vnd.gravitino.v1+json" \
+-H "Content-Type: application/json" \
+http://localhost:8090/api/metalakes/example/catalogs/model_catalog/schemas/model_schema/models/example_model
+```
+
+</TabItem>
+<TabItem value="java" label="Java">
+
+```java
+// ...
+Catalog catalog = gravitinoClient.loadCatalog("model_catalog");
+catalog.asModelCatalog().deleteModel(NameIdentifier.of("model_schema",
"example_model"));
+// ...
+```
+
+</TabItem>
+<TabItem value="python" label="Python">
+
+```python
+gravitino_client: GravitinoClient =
GravitinoClient(uri="http://localhost:8090", metalake_name="example")
+
+catalog: Catalog = gravitino_client.load_catalog(name="model_catalog")
+catalog.as_model_catalog().delete_model(NameIdentifier.of("model_schema",
"example_model"))
+```
+
+</TabItem>
+</Tabs>
+
+Note that the delete operation will delete all the model versions under this
model.
+
+### List models
+
+You can list all the models in a schema by sending a `GET` request to the
`/api/metalakes/
+{metalake_name}/catalogs/{catalog_name}/schemas/{schema_name}/models` endpoint
or by using the
+Gravitino Java/Python client. The following is an example of listing all the
models in a schema:
+
+<Tabs groupId="language" queryString>
+<TabItem value="shell" label="Shell">
+
+```shell
+curl -X GET -H "Accept: application/vnd.gravitino.v1+json" \
+-H "Content-Type: application/json" \
+http://localhost:8090/api/metalakes/example/catalogs/model_catalog/schemas/model_schema/models
+```
+
+</TabItem>
+<TabItem value="java" label="Java">
+
+```java
+// ...
+Catalog catalog = gravitinoClient.loadCatalog("model_catalog");
+NameIdentifier[] identifiers =
catalog.asModelCatalog().listModels(Namespace.of("model_schema"));
+// ...
+```
+
+</TabItem>
+<TabItem value="python" label="Python">
+
+```python
+gravitino_client: GravitinoClient =
GravitinoClient(uri="http://localhost:8090", metalake_name="example")
+
+catalog: Catalog = gravitino_client.load_catalog(name="model_catalog")
+model_list =
catalog.as_model_catalog().list_models(namespace=Namespace.of("model_schema")))
+```
+
+</TabItem>
+</Tabs>
+
+## ModelVersion operations
+
+:::tip
+ - Users should create a metalake, a catalog, a schema, and a model before
link a model version
+ to the model.
+:::
+
+### Link a ModelVersion
+
+You can link a ModelVersion by sending a `POST` request to the
`/api/metalakes/{metalake_name}
+/catalogs/{catalog_name}/schemas/{schema_name}/models/{model_name}/versions`
endpoint or by using
+the Gravitino Java/Python client. The following is an example of linking a
ModelVersion:
+
+<Tabs groupId="language" queryString>
+<TabItem value="shell" label="Shell">
+
+```shell
+curl -X POST -H "Accept: application/vnd.gravitino.v1+json" \
+-H "Content-Type: application/json" -d '{
+ "uri": "path/to/model",
+ "aliases": ["alias1", "alias2"],
+ "comment": "This is version 0",
+ "properties": {
+ "k1": "v1"
+ }
+}'
http://localhost:8090/api/metalakes/example/catalogs/model_catalog/schemas/model_schema/models/example_model/versions
+```
+
+</TabItem>
+<TabItem value="java" label="Java">
+
+```java
+// ...
+Catalog catalog = gravitinoClient.loadCatalog("model_catalog");
+catalog.asModelCatalog().linkModelVersion(
+ NameIdentifier.of("model_schema", "example_model"),
+ "path/to/model",
+ new String[] {"alias1", "alias2"},
+ "This is version 0",
+ ImmutableMap.of("k1", "v1"));
+```
+
+</TabItem>
+<TabItem value="python" label="Python">
+
+```python
+gravitino_client: GravitinoClient =
GravitinoClient(uri="http://localhost:8090", metalake_name="example")
+
+catalog: Catalog = gravitino_client.load_catalog(name="model_catalog")
+catalog.as_model_catalog().link_model_version(model_ident=NameIdentifier.of("model_schema",
"example_model"),
+ uri="path/to/model",
+ aliases=["alias1", "alias2"],
+ comment="This is version 0",
+ properties={"k1": "v1"})
+```
+
+</TabItem>
+</Tabs>
+
+The comment and properties of ModelVersion can be different from the model.
+
+### Get a ModelVersion
+
+You can get a ModelVersion by sending a `GET` request to the
`/api/metalakes/{metalake_name}
+/catalogs/{catalog_name}/schemas/{schema_name}/models/{model_name}/versions/{version_number}`
+endpoint or by using the Gravitino Java/Python client. The following is an
example of getting
+a ModelVersion:
+
+<Tabs groupId="language" queryString>
+<TabItem value="shell" label="Shell">
+
+```shell
+curl -X GET -H "Accept: application/vnd.gravitino.v1+json" \
+-H "Content-Type: application/json" \
+http://localhost:8090/api/metalakes/example/catalogs/model_catalog/schemas/model_schema/models/example_model/versions/0
+```
+
+</TabItem>
+<TabItem value="java" label="Java">
+
+```java
+// ...
+Catalog catalog = gravitinoClient.loadCatalog("model_catalog");
+catalog.asModelCatalog().getModelVersion(NameIdentifier.of("model_schema",
"example_model"), 0);
+// ...
+```
+
+</TabItem>
+<TabItem value="python" label="Python">
+
+```python
+gravitino_client: GravitinoClient =
GravitinoClient(uri="http://localhost:8090", metalake_name="example")
+
+catalog: Catalog = gravitino_client.load_catalog(name="model_catalog")
+catalog.as_model_catalog().get_model_version(model_ident=NameIdentifier.of("model_schema",
"example_model"), version=0)
+```
+
+</TabItem>
+</Tabs>
+
+### Get a ModelVersion by alias
+
+You can also get a ModelVersion by sending a `GET` request to the
`/api/metalakes/{metalake_name}
+/catalogs/{catalog_name}/schemas/{schema_name}/models/{model_name}/aliases/{alias}`
endpoint or
+by using the Gravitino Java/Python client. The following is an example of
getting a ModelVersion
+by alias:
+
+<Tabs groupId="language" queryString>
+<TabItem value="shell" label="Shell">
+
+```shell
+curl -X GET -H "Accept: application/vnd.gravitino.v1+json" \
+-H "Content-Type: application/json" \
+http://localhost:8090/api/metalakes/example/catalogs/model_catalog/schemas/model_schema/models/example_model/aliases/alias1
+```
+
+</TabItem>
+<TabItem value="java" label="Java">
+
+```java
+// ...
+Catalog catalog = gravitinoClient.loadCatalog("model_catalog");
+ModelVersion modelVersion =
catalog.asModelCatalog().getModelVersion(NameIdentifier.of("model_schema",
"example_model"), "alias1");
+// ...
+```
+
+</TabItem>
+<TabItem value="python" label="Python">
+
+```python
+gravitino_client: GravitinoClient =
GravitinoClient(uri="http://localhost:8090", metalake_name="example")
+
+catalog: Catalog = gravitino_client.load_catalog(name="model_catalog")
+model_version: ModelVersion =
catalog.as_model_catalog().get_model_version_by_alias(model_ident=NameIdentifier.of("model_schema",
"example_model"), alias="alias1")
+```
+
+</TabItem>
+</Tabs>
+
+### Delete a ModelVersion
+
+You can delete a ModelVersion by sending a `DELETE` request to the
`/api/metalakes/{metalake_name}
+/catalogs/{catalog_name}/schemas/{schema_name}/models/{model_name}/versions/{version_number}`
+endpoint or by using the Gravitino Java/Python client. The following is an
example of deleting
+a ModelVersion:
+
+<Tabs groupId="language" queryString>
+<TabItem value="shell" label="Shell">
+
+```shell
+curl -X DELETE -H "Accept: application/vnd.gravitino.v1+json" \
+-H "Content-Type: application/json" \
+http://localhost:8090/api/metalakes/example/catalogs/model_catalog/schemas/model_schema/models/example_model/versions/0
+```
+
+</TabItem>
+<TabItem value="java" label="Java">
+
+```java
+// ...
+Catalog catalog = gravitinoClient.loadCatalog("model_catalog");
+catalog.asModelCatalog().deleteModelVersion(NameIdentifier.of("model_schema",
"example_model"), 0);
+// ...
+```
+
+</TabItem>
+<TabItem value="python" label="Python">
+
+```python
+gravitino_client: GravitinoClient =
GravitinoClient(uri="http://localhost:8090", metalake_name="example")
+
+catalog: Catalog = gravitino_client.load_catalog(name="model_catalog")
+catalog.as_model_catalog().delete_model_version(model_ident=NameIdentifier.of("model_schema",
"example_model"), version=0)
+```
+
+</TabItem>
+</Tabs>
+
+### Delete a ModelVersion by alias
+
+You can also delete a ModelVersion by sending a `DELETE` request to the
`/api/metalakes/
+{metalake_name}/catalogs/{catalog_name}/schemas/{schema_name}/models/{model_name}/aliases/{alias}`
endpoint or
+by using the Gravitino Java/Python client. The following is an example of
deleting a ModelVersion
+by alias:
+
+<Tabs groupId="language" queryString>
+<TabItem value="shell" label="Shell">
+
+```shell
+curl -X DELETE -H "Accept: application/vnd.gravitino.v1+json" \
+-H "Content-Type: application/json" \
+http://localhost:8090/api/metalakes/example/catalogs/model_catalog/schemas/model_schema/models/example_model/aliases/alias1
+```
+
+</TabItem>
+<TabItem value="java" label="Java">
+
+```java
+// ...
+Catalog catalog = gravitinoClient.loadCatalog("model_catalog");
+catalog.asModelCatalog().deleteModelVersion(NameIdentifier.of("model_schema",
"example_model"), "alias1");
+// ...
+```
+
+</TabItem>
+<TabItem value="python" label="Python">
+
+```python
+gravitino_client: GravitinoClient =
GravitinoClient(uri="http://localhost:8090", metalake_name="example")
+
+catalog: Catalog = gravitino_client.load_catalog(name="model_catalog")
+catalog.as_model_catalog().delete_model_version_by_alias(model_ident=NameIdentifier.of("model_schema",
"example_model"), alias="alias1")
+```
+
+</TabItem>
+</Tabs>
+
+### List ModelVersions
+
+You can list all the ModelVersions in a model by sending a `GET` request to
the `/api/metalakes/
+{metalake_name}/catalogs/{catalog_name}/schemas/{schema_name}/models/{model_name}/versions`
endpoint
+or by using the Gravitino Java/Python client. The following is an example of
listing all the
+ModelVersions in a model:
+
+<Tabs groupId="language" queryString>
+<TabItem value="shell" label="Shell">
+
+```shell
+curl -X GET -H "Accept: application/vnd.gravitino.v1+json" \
+-H "Content-Type: application/json" \
+http://localhost:8090/api/metalakes/example/catalogs/model_catalog/schemas/model_schema/models/example_model/versions
+```
+
+</TabItem>
+<TabItem value="java" label="Java">
+
+```java
+// ...
+Catalog catalog = gravitinoClient.loadCatalog("model_catalog");
+int[] modelVersions =
catalog.asModelCatalog().listModelVersions(NameIdentifier.of("model_schema",
"example_model"));
+// ...
+```
+
+</TabItem>
+<TabItem value="python" label="Python">
+
+```python
+gravitino_client: GravitinoClient =
GravitinoClient(uri="http://localhost:8090", metalake_name="example")
+
+catalog: Catalog = gravitino_client.load_catalog(name="model_catalog")
+model_versions: List[int] =
catalog.as_model_catalog().list_model_versions(model_ident=NameIdentifier.of("model_schema",
"example_model"))
+```
+
+</TabItem>
+</Tabs>
diff --git a/docs/model-catalog.md b/docs/model-catalog.md
new file mode 100644
index 0000000000..a9da0c8b3f
--- /dev/null
+++ b/docs/model-catalog.md
@@ -0,0 +1,87 @@
+---
+title: "Model catalog"
+slug: /model-catalog
+date: 2024-12-26
+keyword: model catalog
+license: "This software is licensed under the Apache License version 2."
+---
+
+## Introduction
+
+A Model catalog is a metadata catalog that provides the unified interface to
manage the metadata of
+machine learning models in a centralized way. It follows the typical Gravitino
3-level namespace
+(catalog, schema, and model) to manage the ML models metadata. In addition, it
supports
+managing the versions for each model.
+
+The advantages of using model catalog are:
+
+* Centralized management of ML models with user defined namespaces. Users can
better discover
+ and govern the models from sematic level, rather than managing the model
files directly.
+* Version management for each model. Users can easily track the model versions
and manage the
+ model lifecycle.
+
+The key concept of model management is to manage the path (URI) of the model.
Instead of
+managing the model storage path physically and separately, model metadata
defines the mapping
+relation between the model name and the storage path. In the meantime, with
the support of
+extensible properties of model metadata, users can define the model metadata
with more detailed information
+rather than just the storage path.
+
+* **Model**: A model is a metadata object defined in the model catalog, to
manage a ML model. Each
+ model can have many **Model Versions**, and each version can have its own
properties. Models
+ can be retrieved by the name.
+* **ModelVersion**: The model version is a metadata defined in the model
catalog, to manage each
+ version of the ML model. Each version has a unique version number, and can
have its own
+ properties and storage path. ModelVersion can be retrieved by the model name
and version
+ number. Also, each version can have a list of aliases, which can also be
used to retrieve.
+
+## Catalog
+
+### Catalog properties
+
+A Model catalog doesn't have specific properties. It uses the [common catalog
properties](./gravitino-server-config.md#apache-gravitino-catalog-properties-configuration).
+
+### Catalog operations
+
+Refer to [Catalog
operations](./manage-model-metadata-using-gravitino.md#catalog-operations) for
more details.
+
+## Schema
+
+### Schema capabilities
+
+Schema is the second level of the model catalog namespace, the model catalog
supports creating, updating, deleting, and listing schemas.
+
+### Schema properties
+
+Schema in the model catalog doesn't have predefined properties. Users can
define the properties for each schema.
+
+### Schema operations
+
+Refer to [Schema
operation](./manage-model-metadata-using-gravitino.md#schema-operations) for
more details.
+
+## Model
+
+### Model capabilities
+
+The Model catalog supports registering, listing and deleting models and model
versions.
+
+### Model properties
+
+Model doesn't have predefined properties. Users can define the properties for
each model and model version.
+
+### Model operations
+
+Refer to [Model
operation](./manage-model-metadata-using-gravitino.md#model-operations) for
more details.
+
+## ModelVersion
+
+### ModelVersion capabilities
+
+The Model catalog supports linking, listing and deleting model versions.
+
+### ModelVersion properties
+
+ModelVersion doesn't have predefined properties. Users can define the
properties for each version.
+
+### ModelVersion operations
+
+Refer to [ModelVersion
operation](./manage-model-metadata-using-gravitino.md#model-version-operations)
for more details.
diff --git a/docs/open-api/models.yaml b/docs/open-api/models.yaml
index 713a7037cd..652923286b 100644
--- a/docs/open-api/models.yaml
+++ b/docs/open-api/models.yaml
@@ -122,6 +122,33 @@ paths:
"5xx":
$ref: "./openapi.yaml#/components/responses/ServerErrorResponse"
+
/metalakes/{metalake}/catalogs/{catalog}/schemas/{schema}/models/{model}/versions:
+ parameters:
+ - $ref: "./openapi.yaml#/components/parameters/metalake"
+ - $ref: "./openapi.yaml#/components/parameters/catalog"
+ - $ref: "./openapi.yaml#/components/parameters/schema"
+ - $ref: "./openapi.yaml#/components/parameters/model"
+
+ get:
+ tags:
+ - model
+ summary: List model versions
+ operationId: listModelVersions
+ responses:
+ "200":
+ $ref: "#/components/responses/ModelVersionListResponse"
+ "404":
+ description: Not Found - The target model does not exist
+ content:
+ application/vnd.gravitino.v1+json:
+ schema:
+ $ref: "./openapi.yaml#/components/schemas/ErrorModel"
+ examples:
+ NoSuchModelException:
+ $ref: "#/components/examples/NoSuchModelException"
+ "5xx":
+ $ref: "./openapi.yaml#/components/responses/ServerErrorResponse"
+
post:
tags:
- model
@@ -159,33 +186,6 @@ paths:
"5xx":
$ref: "./openapi.yaml#/components/responses/ServerErrorResponse"
-
/metalakes/{metalake}/catalogs/{catalog}/schemas/{schema}/models/{model}/versions:
- parameters:
- - $ref: "./openapi.yaml#/components/parameters/metalake"
- - $ref: "./openapi.yaml#/components/parameters/catalog"
- - $ref: "./openapi.yaml#/components/parameters/schema"
- - $ref: "./openapi.yaml#/components/parameters/model"
-
- get:
- tags:
- - model
- summary: List model versions
- operationId: listModelVersions
- responses:
- "200":
- $ref: "#/components/responses/ModelVersionListResponse"
- "404":
- description: Not Found - The target model does not exist
- content:
- application/vnd.gravitino.v1+json:
- schema:
- $ref: "./openapi.yaml#/components/schemas/ErrorModel"
- examples:
- NoSuchModelException:
- $ref: "#/components/examples/NoSuchModelException"
- "5xx":
- $ref: "./openapi.yaml#/components/responses/ServerErrorResponse"
-
/metalakes/{metalake}/catalogs/{catalog}/schemas/{schema}/models/{model}/versions/{version}:
parameters:
- $ref: "./openapi.yaml#/components/parameters/metalake"
diff --git a/docs/overview.md b/docs/overview.md
index 2b215412ed..17d0ee48e3 100644
--- a/docs/overview.md
+++ b/docs/overview.md
@@ -37,7 +37,7 @@ For example, relational metadata models for tabular data,
like Hive, MySQL, Post
File metadata model for all the unstructured data, like HDFS, S3, and others.
Besides the unified metadata models, Gravitino also provides a unified
metadata governance layer
-(WIP) to manage the metadata in a unified way, including access control,
auditing, discovery and
+to manage the metadata in a unified way, including access control, auditing,
discovery and
others.
### Direct metadata management
@@ -63,24 +63,28 @@ change the existing SQL dialects.
In the meantime, other query engine support is on the roadmap, including
[Apache Spark](https://spark.apache.org/), [Apache
Flink](https://flink.apache.org/) and others.
-### AI asset management (WIP)
+### AI asset management
-The goal of Gravitino is to unify the data management in both data and AI
assets. The support of AI
-assets like models, features, and others are under development.
+The goal of Gravitino is to unify the data management in both data and AI
assets, including raw files, models, etc.
## Terminology
-### The model of Apache Gravitino
+### The metadata object of Apache Gravitino
-
-
-* **Metalake**: The top-level container for metadata. Typically, one group has
one metalake
- to manage all the metadata in it. Each metalake exposes a three-level
namespace(catalog.schema.
+* **Metalake**: The container/tenant for metadata. Typically, one group has
one metalake
+ to manage all the metadata in it. Each metalake exposes a three-level
namespace (catalog.schema.
table) to organize the data.
* **Catalog**: A catalog is a collection of metadata from a specific metadata
source.
Each catalog has a related connector to connect to the specific metadata
source.
-* **Schema**: A schema is equivalent to a database, Schemas only exist in the
specific catalogs
- that support relational metadata sources, such as Apache Hive, MySQL,
PostgreSQL, and others.
+* **Schema**: Schema is the second level namespace to group a collection of
metadata, schema can
+ refer to the database/schema in the relational metadata sources, such as
Apache Hive, MySQL,
+ PostgreSQL, and others. Schema can also refer to the logic namespace for the
fileset and model
+ catalog.
* **Table**: The lowest level in the object hierarchy for catalogs that
support relational
metadata sources. You can create Tables in specific schemas in the catalogs.
-* **Model**: The model represents the metadata in the specific catalogs that
support model management.
+* **Fileset**: The fileset metadata object refers to a collection of files and
directories in
+ the file system. The fileset metadata object is used to manage the logic
metadata for the files.
+* **Model**: The model metadata object represents the metadata in the specific
catalogs that
+ support model management.
+* **Topic**: The topic metadata object represents the metadata in the specific
catalogs that
+ support managing the topic for a message queue system, such as Kafka.
diff --git
a/server/src/main/java/org/apache/gravitino/server/web/rest/ModelOperations.java
b/server/src/main/java/org/apache/gravitino/server/web/rest/ModelOperations.java
index fd50782108..e4b80d0526 100644
---
a/server/src/main/java/org/apache/gravitino/server/web/rest/ModelOperations.java
+++
b/server/src/main/java/org/apache/gravitino/server/web/rest/ModelOperations.java
@@ -286,7 +286,7 @@ public class ModelOperations {
}
@POST
- @Path("{model}")
+ @Path("{model}/versions")
@Produces("application/vnd.gravitino.v1+json")
@Timed(name = "link-model-version." + MetricNames.HTTP_PROCESS_DURATION,
absolute = true)
@ResponseMetered(name = "link-model-version", absolute = true)
diff --git
a/server/src/test/java/org/apache/gravitino/server/web/rest/TestModelOperations.java
b/server/src/test/java/org/apache/gravitino/server/web/rest/TestModelOperations.java
index 42e48d0302..c383a07a46 100644
---
a/server/src/test/java/org/apache/gravitino/server/web/rest/TestModelOperations.java
+++
b/server/src/test/java/org/apache/gravitino/server/web/rest/TestModelOperations.java
@@ -601,6 +601,7 @@ public class TestModelOperations extends JerseyTest {
Response resp =
target(modelPath())
.path("model1")
+ .path("versions")
.request(MediaType.APPLICATION_JSON_TYPE)
.accept("application/vnd.gravitino.v1+json")
.post(Entity.entity(req, MediaType.APPLICATION_JSON_TYPE));
@@ -619,6 +620,7 @@ public class TestModelOperations extends JerseyTest {
Response resp1 =
target(modelPath())
.path("model1")
+ .path("versions")
.request(MediaType.APPLICATION_JSON_TYPE)
.accept("application/vnd.gravitino.v1+json")
.post(Entity.entity(req, MediaType.APPLICATION_JSON_TYPE));
@@ -637,6 +639,7 @@ public class TestModelOperations extends JerseyTest {
Response resp2 =
target(modelPath())
.path("model1")
+ .path("versions")
.request(MediaType.APPLICATION_JSON_TYPE)
.accept("application/vnd.gravitino.v1+json")
.post(Entity.entity(req, MediaType.APPLICATION_JSON_TYPE));
@@ -656,6 +659,7 @@ public class TestModelOperations extends JerseyTest {
Response resp3 =
target(modelPath())
.path("model1")
+ .path("versions")
.request(MediaType.APPLICATION_JSON_TYPE)
.accept("application/vnd.gravitino.v1+json")
.post(Entity.entity(req, MediaType.APPLICATION_JSON_TYPE));
diff --git a/web/web/src/lib/api/models/index.js
b/web/web/src/lib/api/models/index.js
index fa968326d1..74d2e0d368 100644
--- a/web/web/src/lib/api/models/index.js
+++ b/web/web/src/lib/api/models/index.js
@@ -45,7 +45,7 @@ const Apis = {
LINK_VERSION: ({ metalake, catalog, schema, model }) =>
`/api/metalakes/${encodeURIComponent(metalake)}/catalogs/${encodeURIComponent(
catalog
-
)}/schemas/${encodeURIComponent(schema)}/models/${encodeURIComponent(model)}`,
+
)}/schemas/${encodeURIComponent(schema)}/models/${encodeURIComponent(model)}/versions`,
DELETE_VERSION: ({ metalake, catalog, schema, model, version }) => {
return
`/api/metalakes/${encodeURIComponent(metalake)}/catalogs/${encodeURIComponent(
catalog