This is an automated email from the ASF dual-hosted git repository.
chengpan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kyuubi-shaded.git
The following commit(s) were added to refs/heads/master by this push:
new c00b028 [KYUUBI-SHADED #54] Sync Spark Connect pb from Spark 4.0.0 RC4
c00b028 is described below
commit c00b028d852d122d4d78ccd582a2ad0b3c0048e7
Author: Cheng Pan <[email protected]>
AuthorDate: Mon Apr 14 11:44:58 2025 +0800
[KYUUBI-SHADED #54] Sync Spark Connect pb from Spark 4.0.0 RC4
### _Why are the changes needed?_
Sync pb file from [Spark 4.0.0
RC4](https://github.com/apache/spark/tree/v4.0.0-rc4/sql/connect/common/src/main/protobuf),
the probability of changing before the Spark 4.0.0 GA release is tiny, I'm
going to start releasing the kyuubi-shaded 0.5.0 soon
```
$ git log --oneline 8a1f4acead..v4.0.0-rc4 --
sql/connect/common/src/main/protobuf/spark/connect
```
```
e10676638f7 [SPARK-51650][ML][CONNECT] Support delete ml cached objects in
batch
e9e4fcceb04 [SPARK-51326][CONNECT][4.0] Remove LazyExpression proto message
8c70bc54f25 [SPARK-51142][ML][CONNECT] ML protobufs clean up
64173e65c6d [SPARK-49308][CONNECT] Support UserDefinedAggregateFunction in
Spark Connect Scala Client
b188ca6c9ec [SPARK-50104][CONNECT] Support SparkSession.executeCommand in
Connect
7097b3598d8 [SPARK-50133][PYTHON][CONNECT] Support DataFrame conversion to
table argument in Spark Connect Python Client
```
### _How was this patch tested?_
- [ ] Add some test cases that check the changes thoroughly including
negative and positive cases if possible
- [ ] Add screenshots for manual tests if appropriate
- [ ] [Run
test](https://kyuubi.readthedocs.io/en/master/develop_tools/testing.html#running-tests)
locally before make a pull request
Closes #54 from pan3793/proto-4.0.0-rc4.
d1550f4 [Cheng Pan] Sync Spark Connect pb from Spark 4.0.0 RC4
Authored-by: Cheng Pan <[email protected]>
Signed-off-by: Cheng Pan <[email protected]>
---
.../src/main/protobuf/README.md | 2 +-
.../src/main/protobuf/spark/connect/commands.proto | 14 +++++++
.../main/protobuf/spark/connect/expressions.proto | 28 ++++++++++----
.../src/main/protobuf/spark/connect/ml.proto | 44 +++++++++++-----------
.../main/protobuf/spark/connect/ml_common.proto | 26 ++++++++-----
5 files changed, 75 insertions(+), 39 deletions(-)
diff --git a/kyuubi-relocated-spark-connect-rpc/src/main/protobuf/README.md
b/kyuubi-relocated-spark-connect-rpc/src/main/protobuf/README.md
index ac23efb..500b4b1 100644
--- a/kyuubi-relocated-spark-connect-rpc/src/main/protobuf/README.md
+++ b/kyuubi-relocated-spark-connect-rpc/src/main/protobuf/README.md
@@ -1,5 +1,5 @@
proto files are copied from
-https://github.com/apache/spark/tree/8a1f4acead0a580142152656913829700b710652/sql/connect/common/src/main/protobuf
+https://github.com/apache/spark/tree/v4.0.0-rc4/sql/connect/common/src/main/protobuf
and with one additional change in each proto file
```patch
diff --git
a/kyuubi-relocated-spark-connect-rpc/src/main/protobuf/spark/connect/commands.proto
b/kyuubi-relocated-spark-connect-rpc/src/main/protobuf/spark/connect/commands.proto
index bcd782d..e0fb306 100644
---
a/kyuubi-relocated-spark-connect-rpc/src/main/protobuf/spark/connect/commands.proto
+++
b/kyuubi-relocated-spark-connect-rpc/src/main/protobuf/spark/connect/commands.proto
@@ -50,6 +50,8 @@ message Command {
RemoveCachedRemoteRelationCommand remove_cached_remote_relation_command =
15;
MergeIntoTableCommand merge_into_table_command = 16;
MlCommand ml_command = 17;
+ ExecuteExternalCommand execute_external_command = 18;
+
// This field is used to mark extensions to the protocol. When plugins
generate arbitrary
// Commands they can add them here. During the planning the correct
resolution is done.
google.protobuf.Any extension = 999;
@@ -535,3 +537,15 @@ message MergeIntoTableCommand {
// (Required) Whether to enable schema evolution.
bool with_schema_evolution = 7;
}
+
+// Execute an arbitrary string command inside an external execution engine
+message ExecuteExternalCommand {
+ // (Required) The class name of the runner that implements
`ExternalCommandRunner`
+ string runner = 1;
+
+ // (Required) The target command to be executed.
+ string command = 2;
+
+ // (Optional) The options for the runner.
+ map<string, string> options = 3;
+}
diff --git
a/kyuubi-relocated-spark-connect-rpc/src/main/protobuf/spark/connect/expressions.proto
b/kyuubi-relocated-spark-connect-rpc/src/main/protobuf/spark/connect/expressions.proto
index 7b02a6b..b300c3a 100644
---
a/kyuubi-relocated-spark-connect-rpc/src/main/protobuf/spark/connect/expressions.proto
+++
b/kyuubi-relocated-spark-connect-rpc/src/main/protobuf/spark/connect/expressions.proto
@@ -52,8 +52,7 @@ message Expression {
NamedArgumentExpression named_argument_expression = 17;
MergeAction merge_action = 19;
TypedAggregateExpression typed_aggregate_expression = 20;
- LazyExpression lazy_expression = 21;
- SubqueryExpression subquery_expression = 22;
+ SubqueryExpression subquery_expression = 21;
// This field is used to mark extensions to the protocol. When plugins
generate arbitrary
// relations they can add them here. During the planning the correct
resolution is done.
@@ -384,6 +383,8 @@ message CommonInlineUserDefinedFunction {
ScalarScalaUDF scalar_scala_udf = 5;
JavaUDF java_udf = 6;
}
+ // (Required) Indicate if this function should be applied on distinct values.
+ bool is_distinct = 7;
}
message PythonUDF {
@@ -472,21 +473,32 @@ message MergeAction {
}
}
-message LazyExpression {
- // (Required) The expression to be marked as lazy.
- Expression child = 1;
-}
-
message SubqueryExpression {
- // (Required) The id of corresponding connect plan.
+ // (Required) The ID of the corresponding connect plan.
int64 plan_id = 1;
// (Required) The type of the subquery.
SubqueryType subquery_type = 2;
+ // (Optional) Options specific to table arguments.
+ optional TableArgOptions table_arg_options = 3;
+
enum SubqueryType {
SUBQUERY_TYPE_UNKNOWN = 0;
SUBQUERY_TYPE_SCALAR = 1;
SUBQUERY_TYPE_EXISTS = 2;
+ SUBQUERY_TYPE_TABLE_ARG = 3;
+ }
+
+ // Nested message for table argument options.
+ message TableArgOptions {
+ // (Optional) The way that input rows are partitioned.
+ repeated Expression partition_spec = 1;
+
+ // (Optional) Ordering of rows in a partition.
+ repeated Expression.SortOrder order_spec = 2;
+
+ // (Optional) Whether this is a single partition.
+ optional bool with_single_partition = 3;
}
}
diff --git
a/kyuubi-relocated-spark-connect-rpc/src/main/protobuf/spark/connect/ml.proto
b/kyuubi-relocated-spark-connect-rpc/src/main/protobuf/spark/connect/ml.proto
index 231a7c3..d73101f 100644
---
a/kyuubi-relocated-spark-connect-rpc/src/main/protobuf/spark/connect/ml.proto
+++
b/kyuubi-relocated-spark-connect-rpc/src/main/protobuf/spark/connect/ml.proto
@@ -40,18 +40,18 @@ message MlCommand {
// Command for estimator.fit(dataset)
message Fit {
- // Estimator information
+ // (Required) Estimator information (its type should be
OPERATOR_TYPE_ESTIMATOR)
MlOperator estimator = 1;
- // parameters of the Estimator
- MlParams params = 2;
- // the training dataset
+ // (Optional) parameters of the Estimator
+ optional MlParams params = 2;
+ // (Required) the training dataset
Relation dataset = 3;
}
- // Command to delete the cached object which could be a model
+ // Command to delete the cached objects which could be a model
// or summary evaluated by a model
message Delete {
- ObjectRef obj_ref = 1;
+ repeated ObjectRef obj_refs = 1;
}
// Command to write ML operator
@@ -63,31 +63,31 @@ message MlCommand {
// The cached model
ObjectRef obj_ref = 2;
}
- // The parameters of operator which could be estimator/evaluator or a
cached model
- MlParams params = 3;
- // Save the ML instance to the path
+ // (Optional) The parameters of operator which could be
estimator/evaluator or a cached model
+ optional MlParams params = 3;
+ // (Required) Save the ML instance to the path
string path = 4;
- // Overwrites if the output path already exists.
- bool should_overwrite = 5;
- // The options of the writer
+ // (Optional) Overwrites if the output path already exists.
+ optional bool should_overwrite = 5;
+ // (Optional) The options of the writer
map<string, string> options = 6;
}
// Command to load ML operator.
message Read {
- // ML operator information
+ // (Required) ML operator information
MlOperator operator = 1;
- // Load the ML instance from the input path
+ // (Required) Load the ML instance from the input path
string path = 2;
}
// Command for evaluator.evaluate(dataset)
message Evaluate {
- // Evaluator information
+ // (Required) Evaluator information (its type should be
OPERATOR_TYPE_EVALUATOR)
MlOperator evaluator = 1;
- // parameters of the Evaluator
- MlParams params = 2;
- // the evaluating dataset
+ // (Optional) parameters of the Evaluator
+ optional MlParams params = 2;
+ // (Required) the evaluating dataset
Relation dataset = 3;
}
}
@@ -111,8 +111,10 @@ message MlCommandResult {
// Operator name
string name = 2;
}
- string uid = 3;
- MlParams params = 4;
+ // (Optional) the 'uid' of a ML object
+ // Note it is different from the 'id' of a cached object.
+ optional string uid = 3;
+ // (Optional) parameters
+ optional MlParams params = 4;
}
-
}
diff --git
a/kyuubi-relocated-spark-connect-rpc/src/main/protobuf/spark/connect/ml_common.proto
b/kyuubi-relocated-spark-connect-rpc/src/main/protobuf/spark/connect/ml_common.proto
index fd10075..9173473 100644
---
a/kyuubi-relocated-spark-connect-rpc/src/main/protobuf/spark/connect/ml_common.proto
+++
b/kyuubi-relocated-spark-connect-rpc/src/main/protobuf/spark/connect/ml_common.proto
@@ -33,24 +33,32 @@ message MlParams {
// MLOperator represents the ML operators like (Estimator, Transformer or
Evaluator)
message MlOperator {
- // The qualified name of the ML operator.
+ // (Required) The qualified name of the ML operator.
string name = 1;
- // Unique id of the ML operator
+
+ // (Required) Unique id of the ML operator
string uid = 2;
- // Represents what the ML operator is
+
+ // (Required) Represents what the ML operator is
OperatorType type = 3;
+
enum OperatorType {
- UNSPECIFIED = 0;
- ESTIMATOR = 1;
- TRANSFORMER = 2;
- EVALUATOR = 3;
- MODEL = 4;
+ OPERATOR_TYPE_UNSPECIFIED = 0;
+ // ML estimator
+ OPERATOR_TYPE_ESTIMATOR = 1;
+ // ML transformer (non-model)
+ OPERATOR_TYPE_TRANSFORMER = 2;
+ // ML evaluator
+ OPERATOR_TYPE_EVALUATOR = 3;
+ // ML model
+ OPERATOR_TYPE_MODEL = 4;
}
}
// Represents a reference to the cached object which could be a model
// or summary evaluated by a model
message ObjectRef {
- // The ID is used to lookup the object on the server side.
+ // (Required) The ID is used to lookup the object on the server side.
+ // Note it is different from the 'uid' of a ML object.
string id = 1;
}