This is an automated email from the ASF dual-hosted git repository.

yuqi4733 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino.git


The following commit(s) were added to refs/heads/main by this push:
     new e29bdb138c [#8896] improve(doc): clarify className is optional for 
Spark jobs (#9284)
e29bdb138c is described below

commit e29bdb138cae6cf3d9b1d3e5e3ef56ec7fb9417b
Author: Jerry Shao <[email protected]>
AuthorDate: Mon Dec 1 20:37:25 2025 +0800

    [#8896] improve(doc): clarify className is optional for Spark jobs (#9284)
    
    ### What changes were proposed in this pull request?
    
    This PR improves the documentation for Spark job templates to clarify
    that the `className` field is required for Java/Scala Spark applications
    but optional for PySpark applications.
    
    ### Why are the changes needed?
    
    Issue #8896 identified that:
    1. The `className` field could be null or empty when registering or
    updating Spark job templates
    2. It was unclear whether this field should be validated as required
    3. Documentation did not clearly state that `className` is only required
    for Java/Scala jobs, not PySpark jobs
    
    ### Does this PR introduce _any_ user-facing change?
    
    Yes, this PR:
    - **Improves documentation** in Java API, Python API, and OpenAPI specs
    to clarify `className` requirements
    - **Fixes validation bugs** in Python client that incorrectly required
    `className` for all Spark jobs
    - **Fixes OpenAPI schema** that had incorrect field name (`mainClass` →
    `className`) and requirements
    
    ### How was this patch tested?
    
    - Existing tests pass: `./gradlew :common:test --tests
    TestJobTemplateDTO`
    - All changes are documentation and validation fixes with no functional
    code changes
    - The existing test at line 388 of TestJobTemplateDTO already validates
    that null `className` is accepted
    
    ### Changes:
    - Updated Java API and DTO Javadocs to clarify `className` optionality
    - Updated Python API and DTO docstrings with same clarifications
    - Fixed Python validation bugs that incorrectly required `className`
    - Fixed OpenAPI documentation: corrected field name, requirements, and
    descriptions
    - Removed useless `validate()` method override in Python DTO to fix
    linting
    
    Closes #8896
    
    ---------
    
    Co-authored-by: Copilot <[email protected]>
---
 .../org/apache/gravitino/job/SparkJobTemplate.java     | 13 +++++++++++--
 .../gravitino/api/job/spark_job_template.py            | 18 ++++++++++++++----
 .../gravitino/dto/job/spark_job_template_dto.py        | 16 +++++++---------
 .../apache/gravitino/dto/job/SparkJobTemplateDTO.java  |  7 ++++++-
 docs/open-api/jobs.yaml                                | 11 ++++++-----
 5 files changed, 44 insertions(+), 21 deletions(-)

diff --git a/api/src/main/java/org/apache/gravitino/job/SparkJobTemplate.java 
b/api/src/main/java/org/apache/gravitino/job/SparkJobTemplate.java
index 1402dc455b..145f9cf5aa 100644
--- a/api/src/main/java/org/apache/gravitino/job/SparkJobTemplate.java
+++ b/api/src/main/java/org/apache/gravitino/job/SparkJobTemplate.java
@@ -43,6 +43,9 @@ import java.util.Objects;
  * These resources must be accessible to the Gravitino server, and can be 
located in the local file
  * system, on a web server (e.g., HTTP, HTTPS, FTP). Distributed file systems 
like HDFS or S3 will
  * be supported in the future.
+ *
+ * <p>Note: The {@code className} field is required for Java/Scala Spark 
applications but is
+ * optional for PySpark applications. For PySpark jobs, you can set this field 
to {@code null}.
  */
 public class SparkJobTemplate extends JobTemplate {
 
@@ -73,7 +76,10 @@ public class SparkJobTemplate extends JobTemplate {
   /**
    * Returns the class name of the Spark application to be executed.
    *
-   * @return the class name
+   * <p>This field is required for Java/Scala Spark applications but optional 
for PySpark
+   * applications. For PySpark jobs, this may return {@code null}.
+   *
+   * @return the class name, or {@code null} for PySpark applications
    */
   public String className() {
     return className;
@@ -208,7 +214,10 @@ public class SparkJobTemplate extends JobTemplate {
     /**
      * Sets the class name of the Spark application to be executed.
      *
-     * @param className the class name
+     * <p>This field is required for Java/Scala Spark applications but 
optional for PySpark
+     * applications. For PySpark jobs, you can set this to {@code null} or 
omit this call.
+     *
+     * @param className the class name, or {@code null} for PySpark 
applications
      * @return the builder instance for method chaining
      */
     public Builder withClassName(String className) {
diff --git a/clients/client-python/gravitino/api/job/spark_job_template.py 
b/clients/client-python/gravitino/api/job/spark_job_template.py
index c731ccadd5..6d49d65994 100644
--- a/clients/client-python/gravitino/api/job/spark_job_template.py
+++ b/clients/client-python/gravitino/api/job/spark_job_template.py
@@ -23,6 +23,9 @@ from .job_template import JobTemplate, JobType
 class SparkJobTemplate(JobTemplate):
     """
     Represents a job template for executing Spark jobs.
+
+    Note: The class_name field is required for Java/Scala Spark applications 
but is
+    optional for PySpark applications. For PySpark jobs, this field can be 
None.
     """
 
     def __init__(self, builder: "SparkJobTemplate.Builder"):
@@ -41,8 +44,11 @@ class SparkJobTemplate(JobTemplate):
         """
         Returns the class name of the Spark job.
 
+        This field is required for Java/Scala Spark applications but optional 
for PySpark
+        applications. For PySpark jobs, this may return None.
+
         Returns:
-            the class name as a string.
+            the class name as a string, or None for PySpark applications.
         """
         return self._class_name
 
@@ -188,8 +194,11 @@ class SparkJobTemplate(JobTemplate):
             """
             Sets the class name for this Spark job template.
 
+            This field is required for Java/Scala Spark applications but 
optional for PySpark
+            applications. For PySpark jobs, you can omit this call or set it 
to None.
+
             Args:
-                class_name: The fully qualified name of the Spark job class.
+                class_name: The fully qualified name of the Spark job class, 
or None for PySpark applications.
 
             Returns:
                 The builder instance for method chaining.
@@ -252,10 +261,11 @@ class SparkJobTemplate(JobTemplate):
         def validate(self):
             """
             Validates the SparkJobTemplate properties.
+
+            Note: className is required for Java/Scala Spark applications but 
optional for PySpark.
             """
             super().validate()
-            if not self.class_name or not self.class_name.strip():
-                raise ValueError("Class name must not be null or empty")
+            # className is optional - required for Java/Scala but not for 
PySpark
             self.jars = self.jars or []
             self.files = self.files or []
             self.archives = self.archives or []
diff --git a/clients/client-python/gravitino/dto/job/spark_job_template_dto.py 
b/clients/client-python/gravitino/dto/job/spark_job_template_dto.py
index 8eb0a71c6c..f72b51619a 100644
--- a/clients/client-python/gravitino/dto/job/spark_job_template_dto.py
+++ b/clients/client-python/gravitino/dto/job/spark_job_template_dto.py
@@ -27,9 +27,14 @@ from ...api.job.job_template import JobType
 @register_job_template(JobType.SPARK)
 @dataclass
 class SparkJobTemplateDTO(JobTemplateDTO):
-    """Represents a Spark Job Template Data Transfer Object (DTO)."""
+    """
+    Represents a Spark Job Template Data Transfer Object (DTO).
 
-    _class_name: str = field(default=None, 
metadata=config(field_name="className"))
+    Note: The class_name field is required for Java/Scala Spark applications 
but is
+    optional for PySpark applications. For PySpark jobs, this field can be 
None.
+    """
+
+    _class_name: Optional[str] = field(default=None, 
metadata=config(field_name="className"))
     _jars: Optional[List[str]] = field(default=None, 
metadata=config(field_name="jars"))
     _files: Optional[List[str]] = field(
         default=None, metadata=config(field_name="files")
@@ -60,10 +65,3 @@ class SparkJobTemplateDTO(JobTemplateDTO):
     def configs(self) -> Optional[Dict[str, str]]:
         """Returns the configuration properties for the Spark job."""
         return self._configs
-
-    def validate(self) -> None:
-        """Validates the SparkJobTemplateDTO. Ensures that required fields are 
not null or empty."""
-        super().validate()
-
-        if self._class_name is None or not self._class_name.strip():
-            raise ValueError('"className" is required and cannot be empty')
diff --git 
a/common/src/main/java/org/apache/gravitino/dto/job/SparkJobTemplateDTO.java 
b/common/src/main/java/org/apache/gravitino/dto/job/SparkJobTemplateDTO.java
index 8530196d17..933341c041 100644
--- a/common/src/main/java/org/apache/gravitino/dto/job/SparkJobTemplateDTO.java
+++ b/common/src/main/java/org/apache/gravitino/dto/job/SparkJobTemplateDTO.java
@@ -27,7 +27,12 @@ import lombok.ToString;
 import lombok.experimental.Accessors;
 import lombok.experimental.SuperBuilder;
 
-/** Represents a Spark Job Template Data Transfer Object (DTO). */
+/**
+ * Represents a Spark Job Template Data Transfer Object (DTO).
+ *
+ * <p>Note: The {@code className} field is required for Java/Scala Spark 
applications but is
+ * optional for PySpark applications. For PySpark jobs, this field can be 
{@code null}.
+ */
 @Getter
 @Accessors(fluent = true)
 @EqualsAndHashCode(callSuper = true)
diff --git a/docs/open-api/jobs.yaml b/docs/open-api/jobs.yaml
index 951cda10c5..70d3a80602 100644
--- a/docs/open-api/jobs.yaml
+++ b/docs/open-api/jobs.yaml
@@ -421,7 +421,7 @@ components:
       required:
         - name
         - jobType
-        - mainClass
+        - executable
       properties:
         name:
           type: string
@@ -456,9 +456,10 @@ components:
           default: { }
           additionalProperties:
             type: string
-        mainClass:
+        className:
           type: string
-          description: The main class of the Spark job template
+          description: The main class of the Spark job template. Required for 
Java/Scala Spark applications, optional for PySpark applications
+          nullable: true
         jars:
           type: array
           description: The JAR files of the Spark job template
@@ -600,7 +601,7 @@ components:
           nullable: true
         newClassName:
           type: string
-          description: The new class name of the Spark job template
+          description: The new class name of the Spark job template. Required 
for Java/Scala Spark applications, optional for PySpark applications
           nullable: true
         newJars:
           type: array
@@ -819,7 +820,7 @@ components:
             "executable": 
"/var/folders/90/v1d9hxsd6pj8m0jnn6f22tkr0000gn/T/tmpy65fiugc/spark-demo.jar",
             "jobType": "spark",
             "name": "test_run_get_spark",
-            "mainClass": "org.apache.spark.examples.SparkPi",
+            "className": "org.apache.spark.examples.SparkPi",
             "jars": [
               
"/var/folders/90/v1d9hxsd6pj8m0jnn6f22tkr0000gn/T/tmpy65fiugc/spark-job.jar"
             ]

Reply via email to