zhengruifeng commented on code in PR #38742:
URL: https://github.com/apache/spark/pull/38742#discussion_r1029953393
##########
connector/connect/src/main/protobuf/spark/connect/base.proto:
##########
@@ -100,18 +70,138 @@ message AnalyzePlanRequest {
// logging purposes and will not be interpreted by the server.
optional string client_type = 4;
- // (Optional) Get the explain string of the plan.
- Explain explain = 5;
+ repeated AnalysisTask tasks = 5;
+
+ message AnalysisTask {
+ oneof task {
+ // Get the schema
+ Schema schema = 1;
+
+ // Is local
+ IsLocal is_local = 2;
+
+ // Is Streaming
+ IsStreaming is_streaming = 3;
+
+ // Get the explain string of the plan.
+ Explain explain = 4;
+
+ // Get the tree string of the schema.
+ TreeString tree_string = 5;
+
+ // Get the input files.
+ InputFiles input_files = 6;
+
+ // Get the semantic hash
+ SemanticHash semantic_hash = 7;
+
+ // Check whether plans are equal.
+ SameSemantics same_semantics = 8;
+ }
+ }
+
+ // Analyze the input plan and return the schema.
+ message Schema { }
+
+ // Returns true if the `collect` and `take` methods can be run locally.
+ message IsLocal { }
+
+ // Returns true if this Dataset contains one or more sources that
continuously
+ // return data as it arrives.
+ message IsStreaming { }
+
+ // Explains the input plan based on a configurable mode.
+ message Explain {
+ // Plan explanation mode.
+ enum ExplainMode {
+ MODE_UNSPECIFIED = 0;
+
+ // Generates only physical plan.
+ SIMPLE = 1;
+
+ // Generates parsed logical plan, analyzed logical plan, optimized
logical plan and physical plan.
+ // Parsed Logical plan is a unresolved plan that extracted from the
query. Analyzed logical plans
+ // transforms which translates unresolvedAttribute and
unresolvedRelation into fully typed objects.
+ // The optimized logical plan transforms through a set of optimization
rules, resulting in the
+ // physical plan.
+ EXTENDED = 2;
+
+ // Generates code for the statement, if any and a physical plan.
+ CODEGEN = 3;
+
+ // If plan node statistics are available, generates a logical plan and
also the statistics.
+ COST = 4;
+
+ // Generates a physical plan outline and also node details.
+ FORMATTED = 5;
+ }
+
+ // (Required) For analyzePlan rpc calls, configure the mode to explain
plan in strings.
+ ExplainMode explain_mode= 1;
+ }
+
+ // Generate a string to express the schema in a nice tree format.
+ // It will invoke 'StructType.treeString' (same as 'Dataset.printSchema')
+ // to compute the results.
+ message TreeString {
+
+ // (Optional) The level to generate the string.
+ optional int32 level = 1;
+ }
+
+ // Returns a best-effort snapshot of the files that compose this Dataset.
+ // It will invoke 'Dataset.inputFiles' to compute the results.
+ message InputFiles { }
+
+ // Returns a `hashCode` of the logical query plan.
+ // It will invoke 'Dataset.semanticHash' to compute the results.
+ message SemanticHash { }
+
+ // Returns `true` when the logical query plans inside both Datasets are
equal.
+ // It will invoke 'Dataset.sameSemantics' to compute the results.
+ message SameSemantics {
+ Relation other = 1;
+ }
}
// Response to performing analysis of the query. Contains relevant metadata to
be able to
// reason about the performance.
message AnalyzePlanResponse {
string client_id = 1;
- DataType schema = 2;
- // The extended explain string as produced by Spark.
- string explain_string = 3;
+ repeated AnalysisResult results = 2;
+
+ message AnalysisResult {
+ oneof result {
+ // The analyzed schema.
+ DataType schema = 1;
+
+ // Is local
+ bool is_local = 2;
+
+ // Is Streaming
+ bool is_streaming = 3;
+
+ // The extended explain string as produced by Spark.
+ string explain_string = 4;
+
+ // Get the tree string of the schema.
+ string tree_string = 5;
+
+ // Get the input files.
+ InputFiles input_files = 6;
+
+ // Get the semantic hash code.
+ int32 semantic_hash = 7;
Review Comment:
printSchema is frequently used, but I also add others by the way
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]