hvanhovell commented on code in PR #48791:
URL: https://github.com/apache/spark/pull/48791#discussion_r1883115341
##########
sql/connect/common/src/main/protobuf/spark/connect/relations.proto:
##########
@@ -98,13 +99,61 @@ message Relation {
// Catalog API (experimental / unstable)
Catalog catalog = 200;
+ // ML relation
+ MlRelation ml_relation = 300;
+
// This field is used to mark extensions to the protocol. When plugins
generate arbitrary
// relations they can add them here. During the planning the correct
resolution is done.
google.protobuf.Any extension = 998;
Unknown unknown = 999;
}
}
+// Relation to represent ML world
+message MlRelation {
+ oneof ml_type {
+ Transform transform = 1;
+ Fetch fetch = 2;
+ }
+ // Relation to represent transform(input) of the operator
+ // which could be a cached model or a new transformer
+ message Transform {
+ oneof operator {
+ // Object reference
+ ObjectRef obj_ref = 1;
+ // Could be an ML transformer like VectorAssembler
+ MlOperator transformer = 2;
+ }
+ // the input dataframe
+ Relation input = 3;
+ // the operator specific parameters
+ MlParams params = 4;
+ }
+}
+
+// Message for fetching attribute from object on the server side.
+// Fetch can be represented as a Relation or a ML command
+// Command: model.coefficients, model.summary.weightedPrecision which
+// returns the final literal result
+// Relation: model.summary.roc which returns a DataFrame (Relation
+message Fetch {
+ // (Required) reference to the object on the server side or
+ // the intermediate attribute of the model. eg, "model.summary"
+ ObjectRef obj_ref = 1;
+ // (Required) the method name. Eg, "coefficients" of the model
+ // and "weightedPrecision" of the "model.summary"
+ string method = 2;
+ // (Optional) the arguments of the method
+ repeated Args args = 3;
Review Comment:
Can you tell me which methods take arguments? I would prefer if those
actually have dedicated RPCs, or alternatively we expose all of this through
specialized expressions.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]