This is an automated email from the ASF dual-hosted git repository.
yuqi4733 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino.git
The following commit(s) were added to refs/heads/main by this push:
new 8ccb13e534 [#7961] feat(doc): Add the doc for job system (#8066)
8ccb13e534 is described below
commit 8ccb13e534a664874ab4cd9121f2aaff29b10e05
Author: Jerry Shao <[email protected]>
AuthorDate: Tue Aug 19 09:34:30 2025 +0800
[#7961] feat(doc): Add the doc for job system (#8066)
### What changes were proposed in this pull request?
Add the usage document and openapi document for job system.
### Why are the changes needed?
Fix: #7961
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
No need.
---
.../apache/gravitino/job/JobExecutorFactory.java | 2 +-
docs/index.md | 2 +
docs/manage-jobs-in-gravitino.md | 585 +++++++++++++++++
docs/open-api/jobs.yaml | 726 +++++++++++++++++++++
docs/open-api/openapi.yaml | 12 +
5 files changed, 1326 insertions(+), 1 deletion(-)
diff --git
a/core/src/main/java/org/apache/gravitino/job/JobExecutorFactory.java
b/core/src/main/java/org/apache/gravitino/job/JobExecutorFactory.java
index 7399957254..808c8c9d26 100644
--- a/core/src/main/java/org/apache/gravitino/job/JobExecutorFactory.java
+++ b/core/src/main/java/org/apache/gravitino/job/JobExecutorFactory.java
@@ -30,7 +30,7 @@ import org.apache.gravitino.job.local.LocalJobExecutorConfigs;
public class JobExecutorFactory {
- private static final String JOB_EXECUTOR_CONF_PREFIX = "gravitino.executor.";
+ private static final String JOB_EXECUTOR_CONF_PREFIX =
"gravitino.jobExecutor.";
private static final String JOB_EXECUTOR_CLASS_SUFFIX = ".class";
diff --git a/docs/index.md b/docs/index.md
index 3b45c6e871..501f9375fe 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -139,6 +139,8 @@ Gravitino provides governance features to manage metadata
in a unified way. See:
* [Manage tags in Gravitino](./manage-tags-in-gravitino.md): a complete guide
to using Gravitino
to manage tags.
+* [Manage jobs in Gravitino](./manage-jobs-in-gravitino.md): a complete guide
to using Gravitino
+ to manage jobs.
### Gravitino Iceberg REST catalog service
diff --git a/docs/manage-jobs-in-gravitino.md b/docs/manage-jobs-in-gravitino.md
new file mode 100644
index 0000000000..582d9a5cd2
--- /dev/null
+++ b/docs/manage-jobs-in-gravitino.md
@@ -0,0 +1,585 @@
+---
+title: "Manage jobs in Gravitino"
+slug: /manage-jobs-in-gravitino
+date: 2025-08-13
+keywords: job, job template, gravitino
+license: "This software is licensed under the Apache License version 2."
+---
+
+import Tabs from '@theme/Tabs';
+import TabItem from '@theme/TabItem';
+
+## Introduction
+
+Starting from 1.0.0, Apache Gravitino introduces a new submodule called the
job system for users to
+register, run, and manage jobs. This job system integrates with the existing
metadata
+management, enabling users to execute the jobs or actions based on the
metadata,
+known as metadata-driven actions. For instance, this allows users to run jobs
for tasks such as
+compacting Iceberg tables or cleaning old data based on TTL properties.
+
+The aim of the job system is to provide a unified way to manage job templates
and jobs,
+including registering job templates, running jobs based on the job templates,
and other related
+tasks. The job system itself is a unified job submitter that allows users to
run jobs through it,
+but it doesn't provide the actual job execution capabilities. Instead, it
relies on the
+existing job executors (schedulers), such as Apache Airflow, Apache Livy, to
execute the jobs.
+Gravitino's job system provides an extensible way to connect to different job
executors.
+
+:::note
+1. The job system is a new feature introduced in Gravitino 1.0.0, and it is
still under
+ development, so some features may not be fully implemented yet.
+2. The aim of the job system is not to replace the existing job executors. So,
it can only
+ support running a single job at a time, and it doesn't support job
scheduling for now.
+ :::
+
+## Job operations
+
+### Register a new job template
+
+Before running a job, the first step is to register a job template. Currently,
Gravitino
+supports two types of job templates: `shell` and `spark` (we will add more job
templates in the
+future).
+
+#### Shell job template
+
+The `shell` job template is used to run scripts, it can be a shell script, or
any executable
+script. The template is defined as follows:
+
+```json
+{
+ "name": "my_shell_job_template",
+ "jobType": "shell",
+ "comment": "A shell job template to run a script",
+ "executable": "/path/to/my_script.sh",
+ "arguments": ["{{arg1}}", "{{arg2}}"],
+ "environments": {
+ "ENV_VAR1": "{{value1}}",
+ "ENV_VAR2": "{{value2}}"
+ },
+ "customFields": {
+ "field1": "{{value1}}",
+ "field2": "{{value2}}"
+ },
+ "scripts": ["/path/to/script1.sh", "/path/to/script2.sh"]
+}
+```
+
+Here is a brief description of the fields in the job template:
+
+- `name`: The name of the job template, must be unique.
+- `jobType`: The type of the job template, use `shell` for a shell job
template.
+- `comment`: A comment for the job template, which can be used to describe the
job template.
+- `executable`: The path to the executable script, which can be a shell script
or any executable script.
+- `arguments`: The arguments to pass to the executable script, you can use
placeholders like `{{arg1}}`
+ and `{{arg2}}` to be replaced with actual values when running the job.
+- `environments`: The environment variables to set when running the job, you
can use placeholders like
+ `{{value1}}` and `{{value2}}` to be replaced with actual values when running
the job.
+- `customFields`: Custom fields for the job template, which can be used to
store additional
+ information, you can use placeholders like `{{value1}}` and `{{value2}}` to
be replaced with actual
+ values when running the job.
+- `scripts`: A list of scripts that the main executable script can use.
+
+Please note that:
+
+1. The `executable` and `scripts` must be accessible by the Gravitino server.
Currently,
+ Gravitino supports accessing files from the local file system, HTTP(S)
URLs, and FTP(S) URLs
+ (more distributed file system support will be added in the future). So the
`executable` and
+ `scripts` can be a local file path, or a URL like
`http://example.com/my_script.sh`.
+2. The `arguments`, `environments`, and `customFields` can use placeholders
like `{{arg1}}` and
+ `{{value1}}` to be replaced with actual values when running the job. The
placeholders will be
+ replaced with the actual values when running the job, so you can use them
to pass dynamic values
+ to the job template.
+3. Gravitino will copy the `executable` and `scripts` files to the job working
directory
+ when running the job, so you can use the relative path in the `executable`
and `scripts` to
+ refer to other scripts in the job working directory.
+
+#### Spark job template
+
+The `spark` job template is used to run Spark jobs, it is a Spark application
JAR file for now.
+
+**Note** that the Spark job support is still under development, in 1.0.0, it
only supports
+registering a Spark job template, running a Spark job is not supported yet.
+
+The template is defined as follows:
+
+```json
+{
+ "name": "my_spark_job_template",
+ "jobType": "spark",
+ "comment": "A Spark job template to run a Spark application",
+ "executable": "/path/to/my_spark_app.jar",
+ "arguments": ["{{arg1}}", "{{arg2}}"],
+ "environments": {
+ "ENV_VAR1": "{{value1}}",
+ "ENV_VAR2": "{{value2}}"
+ },
+ "customFields": {
+ "field1": "{{value1}}",
+ "field2": "{{value2}}"
+ },
+ "className": "com.example.MySparkApp",
+ "jars": ["/path/to/dependency1.jar", "/path/to/dependency2.jar"],
+ "files": ["/path/to/file1.txt", "/path/to/file2.txt"],
+ "archives": ["/path/to/archive1.zip", "/path/to/archive2.zip"],
+ "configs": {
+ "spark.executor.memory": "2g",
+ "spark.executor.cores": "2"
+ }
+}
+```
+
+Here is a brief description of the fields in the Spark job template:
+
+- `name`: The name of the job template, which must be unique.
+- `jobType`: The type of the job template, use `spark` for Spark job template.
+- `comment`: A comment for the job template, which can be used to describe the
job template.
+- `executable`: The path to the Spark application JAR file, which can be a
local file path or a URL
+ with a supported scheme.
+- `arguments`: The arguments to pass to the Spark application, you can use
placeholders like
+ `{{arg1}}` and `{{arg2}}` to be replaced with actual values when running the
job.
+- `environments`: The environment variables to set when running the job, you
can use placeholders like
+ `{{value1}}` and `{{value2}}` to be replaced with actual values when running
the job.
+- `customFields`: Custom fields for the job template, which can be used to
store additional information.
+ It can use placeholders like `{{value1}}` and `{{value2}}` to be replaced
with actual values
+ when running the job.
+- `className`: The main class of the Spark application, it is required for
Spark job template.
+- `jars`: A list of JAR files to add to the Spark job classpath, which can be
a local file path or a URL
+ with a supported scheme.
+- `files`: A list of files to be copied to the working directory of the Spark
job, which can be a local
+ file path or a URL with a supported scheme.
+- `archives`: A list of archives to be extracted to the working directory of
the Spark job, which
+ can be a local file path or a URL with a supported scheme.
+- `configs`: A map of Spark configurations to set when running the Spark job,
which can use placeholders
+ like `{{value1}}` to be replaced with actual values when running the job.
+
+Note that:
+
+1. The `executable`, `jars`, `files`, and `archives` must be accessible by the
Gravitino server.
+ Currently, Gravitino support accessing files from the local file system,
HTTP(S) URLs, and
+ FTP(S) URLs (more distributed file system supports will be added in the
future). So the
+ `executable`, `jars`, `files`, and `archives` can be a local file path, or
a URL like
+ `http://example.com/my_spark_app.jar`.
+2. The `arguments`, `environments`, `customFields`, and `configs` can use
placeholders like
+ `{{arg1}}` and `{{value1}}` to be replaced with actual values when running
the job. The placeholders
+ will be replaced with the actual values when running the job, so you can
use them to pass dynamic
+ values to the job template.
+3. Gravitino will copy the `executable`, `jars`, `files`, and `archives` files
to the job working
+ directory when running the job, so you can use the relative path in the
`executable`, `jars`,
+ `files`, and `archives` to refer to other files in the job working
directory.
+4. The `className` is required for the Spark job template, it is the main
class of the Spark
+ application to be executed.
+
+To register a job template, you can use REST API or the Java and Python SDKs.
Here is the
+example to register a shell job template:
+
+<Tabs groupId='language' queryString>
+<TabItem value="shell" label="Shell">
+
+```shell
+curl -X POST -H "Accept: application/vnd.gravitino.v1+json" \
+ -H "Content-Type: application/json" \
+ -d '{
+ "jobTemplate": {
+ "name": "my_shell_job_template",
+ "jobType": "shell",
+ "comment": "A shell job template to run a script",
+ "executable": "/path/to/my_script.sh",
+ "arguments": ["{{arg1}}", "{{arg2}}"],
+ "environments": {
+ "ENV_VAR1": "{{value1}}",
+ "ENV_VAR2": "{{value2}}"
+ },
+ "customFields": {
+ "field1": "{{value1}}",
+ "field2": "{{value2}}"
+ },
+ "scripts": ["/path/to/script1.sh", "/path/to/script2.sh"]
+ }
+ }' \
+ http://localhost:8090/api/metalakes/test/jobs/templates
+```
+
+</TabItem>
+<TabItem value="java" label="Java">
+
+```java
+ ShellJobTemplate jobTemplate = ShellJobTemplate.builder()
+ .name("my_shell_job_template")
+ .comment("A shell job template to run a script")
+ .executable("/path/to/my_script.sh")
+ .arguments(List.of("{{arg1}}", "{{arg2}}"))
+ .environments(Map.of("ENV_VAR1", "{{value1}}", "ENV_VAR2", "{{value2}}"))
+ .customFields(Map.of("field1", "{{value1}}", "field2", "{{value2}}"))
+ .scripts(List.of("/path/to/script1.sh", "/path/to/script2.sh"))
+ .build();
+
+ GravitinoClient client = ...;
+ client.registerJobTemplate(jobTemplate);
+```
+
+</TabItem>
+<TabItem value="python" label="Python">
+
+```python
+ shell_job_template = (
+ ShellJobTemplate.builder()
+ .with_name("my_shell_job_template")
+ .with_comment("A shell job template to run a script")
+ .with_executable("/path/to/my_script.sh")
+ .with_arguments(["{{arg1}}", "{{arg2}}"])
+ .with_environments({"ENV_VAR1": "{{value1}}", "ENV_VAR2": "{{value2}}"})
+ .with_custom_fields({"field1": "{{value1}}", "field2": "{{value2}}"})
+ .with_scripts(["/path/to/script1.sh", "/path/to/script2.sh"])
+ .build()
+ )
+
+ client = GravitinoClient(...)
+ client.register_job_template(shell_job_template)
+```
+
+</TabItem>
+</Tabs>
+
+### List registered job templates
+
+You can list all the registered job templates under a metalake by using the
REST API or the Java
+and Python SDKs.
+
+<Tabs groupId='language' queryString>
+<TabItem value="shell" label="Shell">
+
+```shell
+curl -X GET -H "Accept: application/vnd.gravitino.v1+json" \
+ http://localhost:8090/api/metalakes/test/jobs/templates
+
+Or using query parameter "details=true" to get more details of the job
templates:
+
+curl -X GET -H "Accept: application/vnd.gravitino.v1+json" \
+ http://localhost:8090/api/metalakes/test/jobs/templates?details=true
+```
+
+</TabItem>
+<TabItem value="java" label="Java">
+
+```java
+ GravitinoClient client = ...;
+ List<JobTemplate> detailedJobTemplates = client.listJobTemplates();
+```
+
+</TabItem>
+<TabItem value="python" label="Python">
+
+```python
+ client = GravitinoClient(...)
+ detailed_job_templates = client.list_job_templates()
+```
+
+</TabItem>
+</Tabs>
+
+### Get a registered job template by name
+
+You can get a registered job template by its name using the REST API or the
Java and Python SDKs.
+
+<Tabs groupId='language' queryString>
+<TabItem value="shell" label="Shell">
+
+```shell
+curl -X GET -H "Accept: application/vnd.gravitino.v1+json" \
+
http://localhost:8090/api/metalakes/test/jobs/templates/my_shell_job_template
+```
+
+</TabItem>
+<TabItem value="java" label="Java">
+
+```java
+ GravitinoClient client = ...;
+ JobTemplate jobTemplate = client.getJobTemplate("my_shell_job_template");
+```
+
+</TabItem>
+<TabItem value="python" label="Python">
+
+```python
+ client = GravitinoClient(...)
+ job_template = client.get_job_template("my_shell_job_template")
+```
+
+</TabItem>
+</Tabs>
+
+### Delete a registered job template by name
+
+You can delete a registered job template by its name using the REST API or the
Java and Python SDKs.
+
+Note that deleting a job template will also delete all the jobs that are using
this job template.
+If there are queued, started, or to be cancelled jobs that are using this job
template, the deletion
+will fail with an `InUseException` error.
+
+<Tabs groupId='language' queryString>
+<TabItem value="shell" label="Shell">
+
+```shell
+curl -X DELETE -H "Accept: application/vnd.gravitino.v1+json" \
+
http://localhost:8090/api/metalakes/test/jobs/templates/my_shell_job_template
+```
+
+</TabItem>
+<TabItem value="java" label="Java">
+
+```java
+ GravitinoClient client = ...;
+ client.deleteJobTemplate("my_shell_job_template");
+```
+
+</TabItem>
+<TabItem value="python" label="Python">
+
+```python
+ client = GravitinoClient(...)
+ client.delete_job_template("my_shell_job_template")
+```
+
+</TabItem>
+</Tabs>
+
+### Run a job based on a job template
+
+To run a job based on the registered job template, you can use the REST API or
the Java and Python SDKs.
+When running a job, you need to provide the job template name and the
parameters to replace the
+placeholders in the job template.
+
+Gravitino leverages the job executor to run the job, so you need to specify
the job executor
+through configuration `gravitino.job.executor`. By default, it is set to
"local", which means
+the job will be launched as a process within the same machine that runs the
Gravitino server. Note
+that the local job executor is only for testing. If you want to run the job in
a distributed environment,
+you need to implement your own `JobExecutor` and set the configuration, please
see
+[Implement a custom job executor](#implement-a-custom-job-executor) section
below.
+
+<Tabs groupId='language' queryString>
+<TabItem value="shell" label="Shell">
+
+```shell
+
+curl -X POST -H "Accept: application/vnd.gravitino.v1+json" \
+ -H "Content-Type: application/json" \
+ -d '{
+ "jobTemplateName": "my_shell_job_template",
+ "jobConf": {
+ "arg1": "value1",
+ "arg2": "value2",
+ "value1": "env_value1",
+ "value2": "env_value2"
+ }
+ }' \
+ http://localhost:8090/api/metalakes/test/jobs/runs
+```
+
+</TabItem>
+<TabItem value="java" label="Java">
+
+```java
+ GravitinoClient client = ...;
+ JobHandle jobHandle = client.runJob("my_shell_job_template",
ImmutableMap.of(
+ "arg1", "value1",
+ "arg2", "value2",
+ "value1", "env_value1",
+ "value2", "env_value2"
+ ));
+```
+
+</TabItem>
+<TabItem value="python" label="Python">
+
+```python
+ client = GravitinoClient(...)
+ job_handle = client.run_job("my_shell_job_template", {
+ "arg1": "value1",
+ "arg2": "value2",
+ "value1": "env_value1",
+ "value2": "env_value2"
+ })
+```
+
+</TabItem>
+</Tabs>
+
+The returned `JobHandle` contains the job ID and other information about the
job. You can use the job ID to
+check the job status and cancel the job.
+
+The runJob API will return immediately after the job is submitted to the job
executor, and the job will be
+executed asynchronously. You can check the job status using the job ID
returned by the runJob API.
+
+### List all jobs
+
+You can list all the jobs under a metalake by using the REST API or the Java
and Python SDKs.
+
+<Tabs groupId='language' queryString>
+<TabItem value="shell" label="Shell">
+
+```shell
+curl -X GET -H "Accept: application/vnd.gravitino.v1+json" \
+ http://localhost:8090/api/metalakes/test/jobs/runs
+
+Or using query parameter "jobTemplateName=my_shell_job_template" to filter
jobs by job template name:
+
+curl -X GET -H "Accept: application/vnd.gravitino.v1+json" \
+
http://localhost:8090/api/metalakes/test/jobs/runs?jobTemplateName=my_shell_job_template
+```
+
+</TabItem>
+<TabItem value="java" label="Java">
+
+```java
+ GravitinoClient client = ...;
+ List<JobHandle> jobHandles = client.listJobs();
+
+ // To filter jobs by job template name
+ List<JobHandle> filteredJobHandles =
client.listJobs("my_shell_job_template");
+
+```
+
+</TabItem>
+<TabItem value="python" label="Python">
+
+```python
+ client = GravitinoClient(...)
+ job_handles = client.list_jobs()
+
+ # To filter jobs by job template name
+ filtered_job_handles =
client.list_jobs(job_template_name="my_shell_job_template")
+```
+
+</TabItem>
+</Tabs>
+
+### Get a job by job ID
+
+You can get a job by its job ID using the REST API or the Java and Python SDKs.
+
+<Tabs groupId='language' queryString>
+<TabItem value="shell" label="Shell">
+
+```shell
+curl -X GET -H "Accept: application/vnd.gravitino.v1+json" \
+ http://localhost:8090/api/metalakes/test/jobs/runs/job-1234567890
+```
+
+</TabItem>
+<TabItem value="java" label="Java">
+
+```java
+ GravitinoClient client = ...;
+ JobHandle jobHandle = client.getJob("job-1234567890");
+```
+
+</TabItem>
+<TabItem value="python" label="Python">
+
+```python
+ client = GravitinoClient(...)
+ job_handle = client.get_job("job-1234567890")
+```
+
+</TabItem>
+</Tabs>
+
+### Cancel a job by job ID
+
+You can cancel a job by its job ID using the REST API or the Java and Python
SDKs.
+
+The job will be cancelled asynchronously, and the job status will be updated
to `CANCELLING` first,
+then to `CANCELLED` when the cancellation is completed. If the job is already
in `SUCCEEDED`,
+`FAILED`, `CANCELLING`, or `CANCELLED` status, the cancellation will be
ignored.
+
+The cancellation will be done by the job executor with the best effort, it
relies on the job
+executor that supports cancellation. Also, because of the asynchronous nature
of the job
+cancellation, the job may not be actually cancelled.
+
+<Tabs groupId='language' queryString>
+<TabItem value="shell" label="Shell">
+
+```shell
+curl -X POST -H "Accept: application/vnd.gravitino.v1+json" \
+ http://localhost:8090/api/metalakes/test/jobs/runs/job-1234567890
+```
+
+</TabItem>
+<TabItem value="java" label="Java">
+
+```java
+ GravitinoClient client = ...;
+ client.cancelJob("job-1234567890");
+```
+
+</TabItem>
+<TabItem value="python" label="Python">
+
+```python
+ client = GravitinoClient(...)
+ client.cancel_job("job-1234567890")
+```
+
+</TabItem>
+</Tabs>
+
+### Configurations of the job system
+
+You can configure the job system through the `gravitino.conf` file. The
following are the
+default configurations:
+
+| Property name | Description
| Default value |
Required | Since Version |
+|----------------------------------------|-----------------------------------------------------------------------------------|-------------------------------|----------|---------------|
+| `gravitino.job.stagingDir` | Directory for managing the staging
files when running jobs | `/tmp/gravitino/jobs/staging`
| No | 1.0.0 |
+| `gravitino.job.executor` | The job executor to use for running
jobs | `local` |
No | 1.0.0 |
+| `gravitino.job.stagingDirKeepTimeInMs` | The time in milliseconds to keep
the staging directory after the job is completed | `604800000` (7 days)
| No | 1.0.0 |
+| `gravitino.job.statusPullIntervalInMs` | The interval in milliseconds to
pull the job status from the job executor | `300000` (5 minutes)
| No | 1.0.0 |
+
+
+#### Configurations for local job executor
+
+The local job executor is used for testing and development purposes, it runs
the job in the local process.
+The following are the default configurations for the local job executor:
+
+| Property name | Description
| Default value
| Required | Since Version |
+|-----------------------------------------------------|---------------------------------------------------------------------------|----------------------------------------|----------|---------------|
+| `gravitino.jobExecutor.local.waitingQueueSize` | The size of the
waiting queue for queued jobs in the local job executor | `100`
| No | 1.0.0 |
+| `gravitino.jobExecutor.local.maxRunningJobs` | The maximum number of
running jobs in the local job executor | `max(1, min(available
cores / 2, 10))` | No | 1.0.0 |
+| `gravitino.jobExecutor.local.jobStatusKeepTimeInMs` | The time in
milliseconds to keep the job status in the local job executor | `3600000` (1
hour) | No | 1.0.0 |
+
+### Implement a custom job executor
+
+Gravitino's job system is designed to be extensible, allowing you to implement
your own job executor
+to run jobs in a distributed environment. You can refer to the interface
`JobExecutor` in the
+code
[here](https://github.com/apache/gravitino/blob/main/core/src/main/java/org/apache/gravitino/connector/job/JobExecutor.java).
+
+After you implement your own job executor, you need to register it in the
Gravitino server by
+using the `gravitino.conf` file. For example, if you have implemented a job
executor named
+`airflow`, you need to configure it as follows:
+
+```
+gravitino.job.executor = airflow
+gravitino.jobExecutor.airflow.class = com.example.MyAirflowJobExecutor
+```
+
+You can also configure the job executor with additional properties, like:
+
+```
+gravitino.jobExecutor.airflow.host = http://localhost:8080
+gravitino.jobExecutor.airflow.username = myuser
+gravitino.jobExecutor.airflow.password = mypassword
+```
+
+These properties will be passed to the airflow job executor when it is
instantiated.
+
+## Future work
+
+The job system is a new feature introduced in Gravitino 1.0.0, and it still
needs more work:
+
+1. Support modification of job templates.
+2. Support running Spark jobs (Java and PySpark) based on the Spark job
template in the local job
+ executor.
+3. Support more job templates, like Python, SQL, etc.
+4. Support more job executors, like Apache Airflow, Apache Livy, etc.
+5. Support uploading job template related artifacts to the Gravitino server,
also support
+ downloading the artifacts from more distributed file systems like HDFS, S3,
etc.
+6. Support job scheduling, like running jobs periodically, or based on some
events.
diff --git a/docs/open-api/jobs.yaml b/docs/open-api/jobs.yaml
new file mode 100644
index 0000000000..f6d1ec2db6
--- /dev/null
+++ b/docs/open-api/jobs.yaml
@@ -0,0 +1,726 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+---
+
+paths:
+
+ /metalakes/{metalake}/jobs/templates:
+ parameters:
+ - $ref: "./openapi.yaml#/components/parameters/metalake"
+ get:
+ tags:
+ - job
+ summary: List job templates (names)
+ operationId: listJobTemplates
+ description: Returns the list of job templates in the specified metalake
+ parameters:
+ - $ref: "#/components/parameters/details"
+ responses:
+ "200":
+ description: Returns the list of job template objects if {details}
is true, otherwise returns the list of job template names
+ content:
+ application/vnd.gravitino.v1+json:
+ schema:
+ oneOf:
+ - $ref: "./openapi.yaml#/components/schemas/NameListResponse"
+ - $ref: "#/components/responses/JobTemplateListResponse"
+ examples:
+ NameListResponse:
+ $ref: "#/components/examples/NameListResponse"
+ JobTemplateListResponse:
+ $ref: "#/components/examples/JobTemplateListResponse"
+ "400":
+ $ref: "./openapi.yaml#/components/responses/BadRequestErrorResponse"
+ "404":
+ description: Not Found - The specified metalake does not exist
+ content:
+ application/vnd.gravitino.v1+json:
+ schema:
+ $ref: "./openapi.yaml#/components/schemas/ErrorModel"
+ examples:
+ NoSuchMetalakeException:
+ $ref:
"./metalakes.yaml#/components/examples/NoSuchMetalakeException"
+ "5xx":
+ $ref: "./openapi.yaml#/components/responses/ServerErrorResponse"
+
+ post:
+ tags:
+ - job
+ summary: Register a job template
+ operationId: registerJobTemplate
+ description: Registers a job template in the specified metalake
+ requestBody:
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/requests/JobTemplateRegisterRequest"
+ examples:
+ JobTemplateRegisterRequest:
+ $ref: "#/components/examples/JobTemplateRegisterRequest"
+
+ responses:
+ "200":
+ $ref: "./openapi.yaml#/components/responses/BaseResponse"
+ "409":
+ description: Conflict - The target job template already exists in
the specified metalake
+ content:
+ application/vnd.gravitino.v1+json:
+ schema:
+ $ref: "./openapi.yaml#/components/schemas/ErrorModel"
+ examples:
+ JobTemplateAlreadyExistsException:
+ $ref:
"#/components/examples/JobTemplateAlreadyExistsException"
+ "5xx":
+ $ref: "./openapi.yaml#/components/responses/ServerErrorResponse"
+
+
+ /metalakes/{metalake}/jobs/templates/{jobTemplate}:
+ parameters:
+ - $ref: "./openapi.yaml#/components/parameters/metalake"
+ - $ref: "#/components/parameters/jobTemplate"
+
+ get:
+ tags:
+ - job
+ summary: Get job template
+ operationId: getJobTemplate
+ description: Returns the specified job template information in the
specified metalake
+ responses:
+ "200":
+ description: Returns the job template object
+ content:
+ application/vnd.gravitino.v1+json:
+ schema:
+ $ref: "#/components/responses/JobTemplateResponse"
+ examples:
+ JobTemplateResponse:
+ $ref: "#/components/examples/JobTemplateResponse"
+ "404":
+ description: Not Found - The specified job template does not exist
in the specified metalake
+ content:
+ application/vnd.gravitino.v1+json:
+ schema:
+ $ref: "./openapi.yaml#/components/schemas/ErrorModel"
+ examples:
+ NoSuchMetalakeException:
+ $ref:
"./metalakes.yaml#/components/examples/NoSuchMetalakeException"
+ NoSuchJobTemplateException:
+ $ref: "#/components/examples/NoSuchJobTemplateException"
+ "5xx":
+ $ref: "./openapi.yaml#/components/responses/ServerErrorResponse"
+
+ delete:
+ tags:
+ - job
+ summary: Delete job template
+ operationId: deleteJobTemplate
+ responses:
+ "200":
+ $ref: "./openapi.yaml#/components/responses/DropResponse"
+ "400":
+ $ref: "./openapi.yaml#/components/responses/BadRequestErrorResponse"
+ "5xx":
+ $ref: "./openapi.yaml#/components/responses/ServerErrorResponse"
+
+ /metalakes/{metalake}/jobs/runs:
+ parameters:
+ - $ref: "./openapi.yaml#/components/parameters/metalake"
+ get:
+ tags:
+ - job
+ summary: List jobs
+ operationId: listJobs
+ parameters:
+ - $ref: "#/components/parameters/jobTemplateName"
+ responses:
+ "200":
+ description: Returns the list of job objects
+ content:
+ application/vnd.gravitino.v1+json:
+ schema:
+ $ref: "#/components/responses/JobListResponse"
+ examples:
+ JobListResponse:
+ $ref: "#/components/examples/JobListResponse"
+ "400":
+ $ref: "./openapi.yaml#/components/responses/BadRequestErrorResponse"
+ "404":
+ description: Not Found - The specified metalake does not exist
+ content:
+ application/vnd.gravitino.v1+json:
+ schema:
+ $ref: "./openapi.yaml#/components/schemas/ErrorModel"
+ examples:
+ NoSuchMetalakeException:
+ $ref:
"./metalakes.yaml#/components/examples/NoSuchMetalakeException"
+ "5xx":
+ $ref: "./openapi.yaml#/components/responses/ServerErrorResponse"
+
+ post:
+ tags:
+ - job
+ summary: run a job
+ operationId: runJob
+ description: Runs a job based on the specified job template in the
specified metalake
+ requestBody:
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/requests/JobRunRequest"
+ examples:
+ JobRunRequest:
+ $ref: "#/components/examples/JobRunRequest"
+
+ responses:
+ "200":
+ description: Returns the empty response with status code 0
+ content:
+ application/vnd.gravitino.v1+json:
+ schema:
+ $ref: "#/components/responses/JobResponse"
+ "404":
+ description: Not Found - The specified job template does not exist
in the specified metalake
+ content:
+ application/vnd.gravitino.v1+json:
+ schema:
+ $ref: "./openapi.yaml#/components/schemas/ErrorModel"
+ examples:
+ JobTemplateAlreadyExistsException:
+ $ref: "#/components/examples/NoSuchJobTemplateException"
+ "5xx":
+ $ref: "./openapi.yaml#/components/responses/ServerErrorResponse"
+
+
+ /metalakes/{metalake}/jobs/runs/{jobId}:
+ parameters:
+ - $ref: "./openapi.yaml#/components/parameters/metalake"
+ - $ref: "#/components/parameters/jobId"
+
+ get:
+ tags:
+ - job
+ summary: Get job
+ operationId: getJob
+ description: Returns the specified job information in the specified
metalake
+ responses:
+ "200":
+ description: Returns the job object
+ content:
+ application/vnd.gravitino.v1+json:
+ schema:
+ $ref: "#/components/responses/JobResponse"
+ examples:
+ JobResponse:
+ $ref: "#/components/examples/JobResponse"
+ "404":
+ description: Not Found - The specified job does not exist in the
specified metalake
+ content:
+ application/vnd.gravitino.v1+json:
+ schema:
+ $ref: "./openapi.yaml#/components/schemas/ErrorModel"
+ examples:
+ NoSuchMetalakeException:
+ $ref:
"./metalakes.yaml#/components/examples/NoSuchMetalakeException"
+ NoSuchJobException:
+ $ref: "#/components/examples/NoSuchJobException"
+ "5xx":
+ $ref: "./openapi.yaml#/components/responses/ServerErrorResponse"
+
+ post:
+ tags:
+ - job
+ summary: Cancel job
+ operationId: cancelJob
+ description: Cancels the specified job in the specified metalake
+ responses:
+ "200":
+ description: Returns the job object
+ content:
+ application/vnd.gravitino.v1+json:
+ schema:
+ $ref: "#/components/responses/JobResponse"
+ examples:
+ JobResponse:
+ $ref: "#/components/examples/JobResponse"
+ "404":
+ description: Not Found - The specified job does not exist in the
specified metalake
+ content:
+ application/vnd.gravitino.v1+json:
+ schema:
+ $ref: "./openapi.yaml#/components/schemas/ErrorModel"
+ examples:
+ NoSuchMetalakeException:
+ $ref:
"./metalakes.yaml#/components/examples/NoSuchMetalakeException"
+ NoSuchJobException:
+ $ref: "#/components/examples/NoSuchJobException"
+ "5xx":
+ $ref: "./openapi.yaml#/components/responses/ServerErrorResponse"
+
+
+components:
+ parameters:
+ details:
+ name: details
+ in: query
+ description: Include detailed information about the job template
+ required: false
+ schema:
+ type: boolean
+ default: false
+ jobTemplate:
+ name: jobTemplate
+ in: path
+ description: The name of the job template
+ required: true
+ schema:
+ type: string
+ jobTemplateName:
+ name: jobTemplateName
+ in: query
+ description: list the jobs by the job template name
+ required: false
+ schema:
+ type: string
+ default: ""
+ jobId:
+ name: jobId
+ in: path
+ description: The unique identifier of the job
+ required: true
+ schema:
+ type: string
+
+ schemas:
+
+ JobTemplate:
+ oneOf:
+ - $ref: "#/components/schemas/ShellJobTemplate"
+ - $ref: "#/components/schemas/SparkJobTemplate"
+ discriminator:
+ propertyName: jobType
+ mapping:
+ shell: "#/components/schemas/ShellJobTemplate"
+ spark: "#/components/schemas/SparkJobTemplate"
+
+ ShellJobTemplate:
+ type: object
+ description: A job template object for shell jobs
+ required:
+ - name
+ - jobType
+ - executable
+ properties:
+ name:
+ type: string
+ description: The name of the job template
+ jobType:
+ type: string
+ description: The type of the job template
+ enum:
+ - "shell"
+ comment:
+ type: string
+ description: A comment about the job template
+ nullable: true
+ executable:
+ type: string
+ description: The executable command of the job template
+ arguments:
+ type: array
+ description: The arguments of the job template
+ items:
+ type: string
+ nullable: true
+ environments:
+ type: object
+ description: Configured string to string map of environment
variables for the job template
+ default: { }
+ additionalProperties:
+ type: string
+ customFields:
+ type: object
+ description: Configured string to string map of custom fields for
the job template
+ default: { }
+ additionalProperties:
+ type: string
+ scripts:
+ type: array
+ description: The scripts of the job template
+ items:
+ type: string
+ nullable: true
+ audit:
+ $ref: "./openapi.yaml#/components/schemas/Audit"
+
+ SparkJobTemplate:
+ type: object
+ description: A job template object for Spark jobs
+ required:
+ - name
+ - jobType
+ - mainClass
+ properties:
+ name:
+ type: string
+ description: The name of the job template
+ jobType:
+ type: string
+ description: The type of the job template
+ enum:
+ - "spark"
+ comment:
+ type: string
+ description: A comment about the job template
+ nullable: true
+ executable:
+ type: string
+ description: The executable command of the job template
+ arguments:
+ type: array
+ description: The arguments of the job template
+ items:
+ type: string
+ nullable: true
+ environments:
+ type: object
+ description: Configured string to string map of environment
variables for the job template
+ default: { }
+ additionalProperties:
+ type: string
+ customFields:
+ type: object
+ description: Configured string to string map of custom fields for
the job template
+ default: { }
+ additionalProperties:
+ type: string
+ mainClass:
+ type: string
+ description: The main class of the Spark job template
+ jars:
+ type: array
+ description: The JAR files of the Spark job template
+ items:
+ type: string
+ nullable: true
+ files:
+ type: array
+ description: The files of the Spark job template
+ items:
+ type: string
+ nullable: true
+ archives:
+ type: array
+ description: The archives of the Spark job template
+ items:
+ type: string
+ nullable: true
+ configs:
+ type: object
+ description: Configured string to string map of Spark
configurations for the job template
+ default: { }
+ additionalProperties:
+ type: string
+ audit:
+ $ref: "./openapi.yaml#/components/schemas/Audit"
+
+ Job:
+ type: object
+ description: A job object
+ required:
+ - jobId
+ - jobTemplateName
+ - status
+ - audit
+ properties:
+ jobId:
+ type: string
+ description: The unique identifier of the job
+ jobTemplateName:
+ type: string
+ description: The name of the job template used to create the job
+ status:
+ type: string
+ description: The status of the job
+ enum:
+ - "queued"
+ - "started"
+ - "failed"
+ - "succeeded"
+ - "cancelling"
+ - "canceled"
+ audit:
+ $ref: "./openapi.yaml#/components/schemas/Audit"
+
+ requests:
+
+ JobTemplateRegisterRequest:
+ type: object
+ required:
+ - jobTemplate
+ properties:
+ jobTemplate:
+ $ref: "#/components/schemas/JobTemplate"
+
+ JobRunRequest:
+ type: object
+ required:
+ - jobTemplateName
+ properties:
+ jobTemplateName:
+ type: string
+ description: The name of the job template to run
+ jobConf:
+ type: object
+ description: The job configuration for the job run
+ default: { }
+ additionalProperties:
+ type: string
+
+ responses:
+ JobTemplateListResponse:
+ type: object
+ properties:
+ code:
+ type: integer
+ format: int32
+ description: Status code of the response
+ enum:
+ - 0
+ jobTemplates:
+ type: array
+ description: A list of job template objects
+ items:
+ $ref: "#/components/schemas/JobTemplate"
+
+ JobTemplateResponse:
+ type: object
+ properties:
+ code:
+ type: integer
+ format: int32
+ description: Status code of the response
+ enum:
+ - 0
+ jobTemplate:
+ $ref: "#/components/schemas/JobTemplate"
+
+ JobListResponse:
+ type: object
+ properties:
+ code:
+ type: integer
+ format: int32
+ description: Status code of the response
+ enum:
+ - 0
+ jobs:
+ type: array
+ description: A list of job objects
+ items:
+ $ref: "#/components/schemas/Job"
+
+ JobResponse:
+ type: object
+ properties:
+ code:
+ type: integer
+ format: int32
+ description: Status code of the response
+ enum:
+ - 0
+ job:
+ $ref: "#/components/schemas/Job"
+
+ examples:
+ NameListResponse:
+ value: {
+ "code": 0,
+ "names": ["my_template1", "my_template2"]
+ }
+
+ JobTemplateListResponse:
+ value: {
+ "code": 0,
+ "jobTemplates": [
+ {
+ "arguments": [
+ "{{arg1}}",
+ "{{arg2}}"
+ ],
+ "audit": {
+ "createTime": "2025-08-12T02:14:28.205023Z",
+ "creator": "anonymous"
+ },
+ "comment": "Test shell job template",
+ "customFields": { },
+ "environments": {
+ "ENV_VAR": "{{env_var}}"
+ },
+ "executable":
"/var/folders/90/v1d9hxsd6pj8m0jnn6f22tkr0000gn/T/tmpy65fiugc/test-job.sh",
+ "jobType": "shell",
+ "name": "test_run_get",
+ "scripts": [
+
"/var/folders/90/v1d9hxsd6pj8m0jnn6f22tkr0000gn/T/tmpy65fiugc/common.sh"
+ ]
+ },
+ {
+ "arguments": [
+ "--arg1",
+ "{{arg2}}"
+ ],
+ "audit": {
+ "createTime": "2025-08-12T02:14:28.205023Z",
+ "creator": "anonymous"
+ },
+ "comment": "Test spark job template",
+ "customFields": { },
+ "environments": {
+ "ENV_VAR": "{{env_var}}"
+ },
+ "executable":
"/var/folders/90/v1d9hxsd6pj8m0jnn6f22tkr0000gn/T/tmpy65fiugc/spark-demo.jar",
+ "jobType": "spark",
+ "name": "test_run_get_spark",
+ "mainClass": "org.apache.spark.examples.SparkPi",
+ "jars": [
+
"/var/folders/90/v1d9hxsd6pj8m0jnn6f22tkr0000gn/T/tmpy65fiugc/spark-job.jar"
+ ]
+ }
+ ]
+ }
+
+ JobTemplateRegisterRequest:
+ value: {
+ "jobTemplate": {
+ "name": "test_run_get",
+ "jobType": "shell",
+ "comment": "Test shell job template",
+ "executable":
"/var/folders/90/v1d9hxsd6pj8m0jnn6f22tkr0000gn/T/tmpy65fiugc/test-job.sh",
+ "arguments": ["{{arg1}}", "{{arg2}}"],
+ "environments": {
+ "ENV_VAR": "{{env_var}}"
+ },
+ "customFields": { },
+ "scripts": [
+
"/var/folders/90/v1d9hxsd6pj8m0jnn6f22tkr0000gn/T/tmpy65fiugc/common.sh"
+ ]
+ }
+ }
+
+ JobRunRequest:
+ value: {
+ "jobTemplateName": "test_run_get",
+ "jobConf": {
+ "arg1": "value1",
+ "arg2": "value2"
+ }
+ }
+
+ JobTemplateResponse:
+ value: {
+ "code": 0,
+ "jobTemplate": {
+ "name": "test_run_get",
+ "jobType": "shell",
+ "comment": "Test shell job template",
+ "executable":
"/var/folders/90/v1d9hxsd6pj8m0jnn6f22tkr0000gn/T/tmpy65fiugc/test-job.sh",
+ "arguments": ["{{arg1}}", "{{arg2}}"],
+ "environments": {
+ "ENV_VAR": "{{env_var}}"
+ },
+ "customFields": { },
+ "scripts": [
+
"/var/folders/90/v1d9hxsd6pj8m0jnn6f22tkr0000gn/T/tmpy65fiugc/common.sh"
+ ],
+ "audit": {
+ "createTime": "2025-08-12T02:14:28.205023Z",
+ "creator": "anonymous"
+ }
+ }
+ }
+
+ JobListResponse:
+ value: {
+ "code": 0,
+ "jobs": [
+ {
+ "jobId": "job-12345",
+ "jobTemplateName": "test_run_get",
+ "status": "succeeded",
+ "audit": {
+ "createTime": "2025-08-12T02:14:28.205023Z",
+ "creator": "anonymous"
+ }
+ },
+ {
+ "jobId": "job-67890",
+ "jobTemplateName": "test_run_get_spark",
+ "status": "failed",
+ "audit": {
+ "createTime": "2025-08-12T02:14:28.205023Z",
+ "creator": "anonymous"
+ }
+ }
+ ]
+ }
+
+ JobResponse:
+ value: {
+ "code": 0,
+ "job": {
+ "jobId": "job-12345",
+ "jobTemplateName": "test_run_get",
+ "status": "succeeded",
+ "audit": {
+ "createTime": "2025-08-12T02:14:28.205023Z",
+ "creator": "anonymous"
+ }
+ }
+ }
+
+ JobTemplateAlreadyExistsException:
+ value: {
+ "code": 1004,
+ "type": "JobTemplateAlreadyExistsException",
+ "message": "Failed to operate job template(s) [my_job] operation
[REGISTER], reason [JobTemplateAlreadyExistsException]",
+ "stack": [
+ "org.apache.gravitino.exceptions.JobTemplateAlreadyExistsException:
job template xxx already exists",
+ "..."
+ ]
+ }
+
+ NoSuchJobTemplateException:
+ value: {
+ "code": 1003,
+ "type": "NoSuchJobTemplateException",
+ "message": "Failed to operate job template(s) [my_job] operation [GET]
under metalake [my_test_metalake], reason [NoSuchJobTemplateException]",
+ "stack": [
+ "org.apache.gravitino.exceptions.NoSuchJobTemplateException: Job
template xxx does not exist",
+ "..."
+ ]
+ }
+
+ NoSuchJobException:
+ value: {
+ "code": 1003,
+ "type": "NoSuchJobException",
+ "message": "Failed to operate job(s) [my_job] operation [GET] under
metalake [my_test_metalake], reason [NoSuchJobException]",
+ "stack": [
+ "org.apache.gravitino.exceptions.NoSuchJobException: Job xxx does
not exist",
+ "..."
+ ]
+ }
diff --git a/docs/open-api/openapi.yaml b/docs/open-api/openapi.yaml
index 2133e3c207..a802947ff3 100644
--- a/docs/open-api/openapi.yaml
+++ b/docs/open-api/openapi.yaml
@@ -185,6 +185,18 @@ paths:
/lineage:
$ref: "./lineage.yaml#/paths/~1lineage"
+ /metalakes/{metalake}/jobs/templates:
+ $ref: "./jobs.yaml#/paths/~1metalakes~1%7Bmetalake%7D~1jobs~1templates"
+
+ /metalakes/{metalake}/jobs/templates/{jobTemplate}:
+ $ref:
"./jobs.yaml#/paths/~1metalakes~1%7Bmetalake%7D~1jobs~1templates~1%7BjobTemplate%7D"
+
+ /metalakes/{metalake}/jobs/runs:
+ $ref: "./jobs.yaml#/paths/~1metalakes~1%7Bmetalake%7D~1jobs~1runs"
+
+ /metalakes/{metalake}/jobs/runs/{jobId}:
+ $ref:
"./jobs.yaml#/paths/~1metalakes~1%7Bmetalake%7D~1jobs~1runs~1%7BjobId%7D"
+
components:
schemas: