This is an automated email from the ASF dual-hosted git repository.

chengpan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kyuubi-shaded.git


The following commit(s) were added to refs/heads/master by this push:
     new c00b028  [KYUUBI-SHADED #54] Sync Spark Connect pb from Spark 4.0.0 RC4
c00b028 is described below

commit c00b028d852d122d4d78ccd582a2ad0b3c0048e7
Author: Cheng Pan <[email protected]>
AuthorDate: Mon Apr 14 11:44:58 2025 +0800

    [KYUUBI-SHADED #54] Sync Spark Connect pb from Spark 4.0.0 RC4
    
    ### _Why are the changes needed?_
    
    Sync pb file from [Spark 4.0.0 
RC4](https://github.com/apache/spark/tree/v4.0.0-rc4/sql/connect/common/src/main/protobuf),
 the probability of changing before the Spark 4.0.0 GA release is tiny, I'm 
going to start releasing the kyuubi-shaded 0.5.0 soon
    ```
    $ git log --oneline 8a1f4acead..v4.0.0-rc4 -- 
sql/connect/common/src/main/protobuf/spark/connect
    ```
    ```
    e10676638f7 [SPARK-51650][ML][CONNECT] Support delete ml cached objects in 
batch
    e9e4fcceb04 [SPARK-51326][CONNECT][4.0] Remove LazyExpression proto message
    8c70bc54f25 [SPARK-51142][ML][CONNECT] ML protobufs clean up
    64173e65c6d [SPARK-49308][CONNECT] Support UserDefinedAggregateFunction in 
Spark Connect Scala Client
    b188ca6c9ec [SPARK-50104][CONNECT] Support SparkSession.executeCommand in 
Connect
    7097b3598d8 [SPARK-50133][PYTHON][CONNECT] Support DataFrame conversion to 
table argument in Spark Connect Python Client
    ```
    
    ### _How was this patch tested?_
    - [ ] Add some test cases that check the changes thoroughly including 
negative and positive cases if possible
    
    - [ ] Add screenshots for manual tests if appropriate
    
    - [ ] [Run 
test](https://kyuubi.readthedocs.io/en/master/develop_tools/testing.html#running-tests)
 locally before make a pull request
    
    Closes #54 from pan3793/proto-4.0.0-rc4.
    
    d1550f4 [Cheng Pan] Sync Spark Connect pb from Spark 4.0.0 RC4
    
    Authored-by: Cheng Pan <[email protected]>
    Signed-off-by: Cheng Pan <[email protected]>
---
 .../src/main/protobuf/README.md                    |  2 +-
 .../src/main/protobuf/spark/connect/commands.proto | 14 +++++++
 .../main/protobuf/spark/connect/expressions.proto  | 28 ++++++++++----
 .../src/main/protobuf/spark/connect/ml.proto       | 44 +++++++++++-----------
 .../main/protobuf/spark/connect/ml_common.proto    | 26 ++++++++-----
 5 files changed, 75 insertions(+), 39 deletions(-)

diff --git a/kyuubi-relocated-spark-connect-rpc/src/main/protobuf/README.md 
b/kyuubi-relocated-spark-connect-rpc/src/main/protobuf/README.md
index ac23efb..500b4b1 100644
--- a/kyuubi-relocated-spark-connect-rpc/src/main/protobuf/README.md
+++ b/kyuubi-relocated-spark-connect-rpc/src/main/protobuf/README.md
@@ -1,5 +1,5 @@
 proto files are copied from
-https://github.com/apache/spark/tree/8a1f4acead0a580142152656913829700b710652/sql/connect/common/src/main/protobuf
+https://github.com/apache/spark/tree/v4.0.0-rc4/sql/connect/common/src/main/protobuf
 
 and with one additional change in each proto file
 ```patch
diff --git 
a/kyuubi-relocated-spark-connect-rpc/src/main/protobuf/spark/connect/commands.proto
 
b/kyuubi-relocated-spark-connect-rpc/src/main/protobuf/spark/connect/commands.proto
index bcd782d..e0fb306 100644
--- 
a/kyuubi-relocated-spark-connect-rpc/src/main/protobuf/spark/connect/commands.proto
+++ 
b/kyuubi-relocated-spark-connect-rpc/src/main/protobuf/spark/connect/commands.proto
@@ -50,6 +50,8 @@ message Command {
     RemoveCachedRemoteRelationCommand remove_cached_remote_relation_command = 
15;
     MergeIntoTableCommand merge_into_table_command = 16;
     MlCommand ml_command = 17;
+    ExecuteExternalCommand execute_external_command = 18;
+
     // This field is used to mark extensions to the protocol. When plugins 
generate arbitrary
     // Commands they can add them here. During the planning the correct 
resolution is done.
     google.protobuf.Any extension = 999;
@@ -535,3 +537,15 @@ message MergeIntoTableCommand {
   // (Required) Whether to enable schema evolution.
   bool with_schema_evolution = 7;
 }
+
+// Execute an arbitrary string command inside an external execution engine
+message ExecuteExternalCommand {
+  // (Required) The class name of the runner that implements 
`ExternalCommandRunner`
+  string runner = 1;
+
+  // (Required) The target command to be executed.
+  string command = 2;
+
+  // (Optional) The options for the runner.
+  map<string, string> options = 3;
+}
diff --git 
a/kyuubi-relocated-spark-connect-rpc/src/main/protobuf/spark/connect/expressions.proto
 
b/kyuubi-relocated-spark-connect-rpc/src/main/protobuf/spark/connect/expressions.proto
index 7b02a6b..b300c3a 100644
--- 
a/kyuubi-relocated-spark-connect-rpc/src/main/protobuf/spark/connect/expressions.proto
+++ 
b/kyuubi-relocated-spark-connect-rpc/src/main/protobuf/spark/connect/expressions.proto
@@ -52,8 +52,7 @@ message Expression {
     NamedArgumentExpression named_argument_expression = 17;
     MergeAction merge_action = 19;
     TypedAggregateExpression typed_aggregate_expression = 20;
-    LazyExpression lazy_expression = 21;
-    SubqueryExpression subquery_expression = 22;
+    SubqueryExpression subquery_expression = 21;
 
     // This field is used to mark extensions to the protocol. When plugins 
generate arbitrary
     // relations they can add them here. During the planning the correct 
resolution is done.
@@ -384,6 +383,8 @@ message CommonInlineUserDefinedFunction {
     ScalarScalaUDF scalar_scala_udf = 5;
     JavaUDF java_udf = 6;
   }
+  // (Required) Indicate if this function should be applied on distinct values.
+  bool is_distinct = 7;
 }
 
 message PythonUDF {
@@ -472,21 +473,32 @@ message MergeAction {
   }
 }
 
-message LazyExpression {
-  // (Required) The expression to be marked as lazy.
-  Expression child = 1;
-}
-
 message SubqueryExpression {
-  // (Required) The id of corresponding connect plan.
+  // (Required) The ID of the corresponding connect plan.
   int64 plan_id = 1;
 
   // (Required) The type of the subquery.
   SubqueryType subquery_type = 2;
 
+  // (Optional) Options specific to table arguments.
+  optional TableArgOptions table_arg_options = 3;
+
   enum SubqueryType {
     SUBQUERY_TYPE_UNKNOWN = 0;
     SUBQUERY_TYPE_SCALAR = 1;
     SUBQUERY_TYPE_EXISTS = 2;
+    SUBQUERY_TYPE_TABLE_ARG = 3;
+  }
+
+  // Nested message for table argument options.
+  message TableArgOptions {
+    // (Optional) The way that input rows are partitioned.
+    repeated Expression partition_spec = 1;
+
+    // (Optional) Ordering of rows in a partition.
+    repeated Expression.SortOrder order_spec = 2;
+
+    // (Optional) Whether this is a single partition.
+    optional bool with_single_partition = 3;
   }
 }
diff --git 
a/kyuubi-relocated-spark-connect-rpc/src/main/protobuf/spark/connect/ml.proto 
b/kyuubi-relocated-spark-connect-rpc/src/main/protobuf/spark/connect/ml.proto
index 231a7c3..d73101f 100644
--- 
a/kyuubi-relocated-spark-connect-rpc/src/main/protobuf/spark/connect/ml.proto
+++ 
b/kyuubi-relocated-spark-connect-rpc/src/main/protobuf/spark/connect/ml.proto
@@ -40,18 +40,18 @@ message MlCommand {
 
   // Command for estimator.fit(dataset)
   message Fit {
-    // Estimator information
+    // (Required) Estimator information (its type should be 
OPERATOR_TYPE_ESTIMATOR)
     MlOperator estimator = 1;
-    // parameters of the Estimator
-    MlParams params = 2;
-    // the training dataset
+    // (Optional) parameters of the Estimator
+    optional MlParams params = 2;
+    // (Required) the training dataset
     Relation dataset = 3;
   }
 
-  // Command to delete the cached object which could be a model
+  // Command to delete the cached objects which could be a model
   // or summary evaluated by a model
   message Delete {
-    ObjectRef obj_ref = 1;
+    repeated ObjectRef obj_refs = 1;
   }
 
   // Command to write ML operator
@@ -63,31 +63,31 @@ message MlCommand {
       // The cached model
       ObjectRef obj_ref = 2;
     }
-    // The parameters of operator which could be estimator/evaluator or a 
cached model
-    MlParams params = 3;
-    // Save the ML instance to the path
+    // (Optional) The parameters of operator which could be 
estimator/evaluator or a cached model
+    optional MlParams params = 3;
+    // (Required) Save the ML instance to the path
     string path = 4;
-    // Overwrites if the output path already exists.
-    bool should_overwrite = 5;
-    // The options of the writer
+    // (Optional) Overwrites if the output path already exists.
+    optional bool should_overwrite = 5;
+    // (Optional) The options of the writer
     map<string, string> options = 6;
   }
 
   // Command to load ML operator.
   message Read {
-    // ML operator information
+    // (Required) ML operator information
     MlOperator operator = 1;
-    // Load the ML instance from the input path
+    // (Required) Load the ML instance from the input path
     string path = 2;
   }
 
   // Command for evaluator.evaluate(dataset)
   message Evaluate {
-    // Evaluator information
+    // (Required) Evaluator information (its type should be 
OPERATOR_TYPE_EVALUATOR)
     MlOperator evaluator = 1;
-    // parameters of the Evaluator
-    MlParams params = 2;
-    // the evaluating dataset
+    // (Optional) parameters of the Evaluator
+    optional MlParams params = 2;
+    // (Required) the evaluating dataset
     Relation dataset = 3;
   }
 }
@@ -111,8 +111,10 @@ message MlCommandResult {
       // Operator name
       string name = 2;
     }
-    string uid = 3;
-    MlParams params = 4;
+    // (Optional) the 'uid' of a ML object
+    // Note it is different from the 'id' of a cached object.
+    optional string uid = 3;
+    // (Optional) parameters
+    optional MlParams params = 4;
   }
-
 }
diff --git 
a/kyuubi-relocated-spark-connect-rpc/src/main/protobuf/spark/connect/ml_common.proto
 
b/kyuubi-relocated-spark-connect-rpc/src/main/protobuf/spark/connect/ml_common.proto
index fd10075..9173473 100644
--- 
a/kyuubi-relocated-spark-connect-rpc/src/main/protobuf/spark/connect/ml_common.proto
+++ 
b/kyuubi-relocated-spark-connect-rpc/src/main/protobuf/spark/connect/ml_common.proto
@@ -33,24 +33,32 @@ message MlParams {
 
 // MLOperator represents the ML operators like (Estimator, Transformer or 
Evaluator)
 message MlOperator {
-  // The qualified name of the ML operator.
+  // (Required) The qualified name of the ML operator.
   string name = 1;
-  // Unique id of the ML operator
+
+  // (Required) Unique id of the ML operator
   string uid = 2;
-  // Represents what the ML operator is
+
+  // (Required) Represents what the ML operator is
   OperatorType type = 3;
+
   enum OperatorType {
-    UNSPECIFIED = 0;
-    ESTIMATOR = 1;
-    TRANSFORMER = 2;
-    EVALUATOR = 3;
-    MODEL = 4;
+    OPERATOR_TYPE_UNSPECIFIED = 0;
+    // ML estimator
+    OPERATOR_TYPE_ESTIMATOR = 1;
+    // ML transformer (non-model)
+    OPERATOR_TYPE_TRANSFORMER = 2;
+    // ML evaluator
+    OPERATOR_TYPE_EVALUATOR = 3;
+    // ML model
+    OPERATOR_TYPE_MODEL = 4;
   }
 }
 
 // Represents a reference to the cached object which could be a model
 // or summary evaluated by a model
 message ObjectRef {
-  // The ID is used to lookup the object on the server side.
+  // (Required) The ID is used to lookup the object on the server side.
+  // Note it is different from the 'uid' of a ML object.
   string id = 1;
 }

Reply via email to