(kyuubi-shaded) branch master updated: [KYUUBI-SHADED #49] Create relocated package for spark-connect-rpc

chengpan Mon, 19 Aug 2024 00:36:20 -0700

This is an automated email from the ASF dual-hosted git repository.

chengpan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kyuubi-shaded.git



The following commit(s) were added to refs/heads/master by this push:
     new c58c858  [KYUUBI-SHADED #49] Create relocated package for 
spark-connect-rpc
c58c858 is described below

commit c58c858b358b4fb90b06fff3b8147d2592d48c23
Author: Cheng Pan <[email protected]>
AuthorDate: Mon Aug 19 15:36:04 2024 +0800

    [KYUUBI-SHADED #49] Create relocated package for spark-connect-rpc
    
    ### _Why are the changes needed?_
    
    This PR introduces a new module `kyuubi-relocated-spark-connect-rpc`, which 
forks [Spark Connect proto 
files](https://github.com/apache/spark/tree/5b2d2149b615acdd8730547a1f24c2b637222545/sql/connect/common/src/main/protobuf)
 and shifts the package name from `org.apache.spark.connect.proto` to 
`org.apache.kyuubi.shaded.spark.connect.proto`, this is a precondition for 
Kyuubi to support Spark Connect
    
    ### _How was this patch tested?_
    - [ ] Add some test cases that check the changes thoroughly including 
negative and positive cases if possible
    
    - [ ] Add screenshots for manual tests if appropriate
    
    - [ ] [Run 
test](https://kyuubi.readthedocs.io/en/master/develop_tools/testing.html#running-tests)
 locally before make a pull request
    
    Closes #49 from pan3793/spark-connect-rpc.
    
    316d46f [Cheng Pan] fi
    914e1ba [Cheng Pan] Add kyuubi-relocated-spark-connect-rpc
    
    Authored-by: Cheng Pan <[email protected]>
    Signed-off-by: Cheng Pan <[email protected]>
---
 kyuubi-relocated-spark-connect-rpc/pom.xml         |  110 ++
 .../src/main/protobuf/README.md                    |    8 +
 .../src/main/protobuf/spark/connect/base.proto     | 1103 +++++++++++++++++++
 .../src/main/protobuf/spark/connect/catalog.proto  |  243 +++++
 .../src/main/protobuf/spark/connect/commands.proto |  533 ++++++++++
 .../src/main/protobuf/spark/connect/common.proto   |   98 ++
 .../protobuf/spark/connect/example_plugins.proto   |   42 +
 .../main/protobuf/spark/connect/expressions.proto  |  453 ++++++++
 .../main/protobuf/spark/connect/relations.proto    | 1120 ++++++++++++++++++++
 .../src/main/protobuf/spark/connect/types.proto    |  201 ++++
 .../src/main/resources/META-INF/NOTICE             |    8 +
 pom.xml                                            |    9 +
 12 files changed, 3928 insertions(+)

diff --git a/kyuubi-relocated-spark-connect-rpc/pom.xml 
b/kyuubi-relocated-spark-connect-rpc/pom.xml
new file mode 100644
index 0000000..9ade5f4
--- /dev/null
+++ b/kyuubi-relocated-spark-connect-rpc/pom.xml
@@ -0,0 +1,110 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0"; 
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance";
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 
http://maven.apache.org/maven-v4_0_0.xsd";>
+
+    <modelVersion>4.0.0</modelVersion>
+
+    <parent>
+        <groupId>org.apache.kyuubi</groupId>
+        <artifactId>kyuubi-relocated-parent</artifactId>
+        <version>0.5.0-SNAPSHOT</version>
+    </parent>
+
+    <artifactId>kyuubi-relocated-spark-connect-rpc</artifactId>
+    <description>Relocated Spark Connect RPC classes used by Kyuubi 
internally.</description>
+
+    <properties>
+        <grpc.version>1.65.1</grpc.version>
+        <protobuf.version>3.25.1</protobuf.version>
+    </properties>
+
+    <dependencyManagement>
+        <dependencies>
+            <dependency>
+                <groupId>io.grpc</groupId>
+                <artifactId>grpc-bom</artifactId>
+                <version>${grpc.version}</version>
+                <type>pom</type>
+                <scope>import</scope>
+            </dependency>
+            <dependency>
+                <groupId>com.google.protobuf</groupId>
+                <artifactId>protobuf-bom</artifactId>
+                <version>${protobuf.version}</version>
+                <type>pom</type>
+                <scope>import</scope>
+            </dependency>
+        </dependencies>
+    </dependencyManagement>
+
+    <dependencies>
+        <dependency>
+            <groupId>io.grpc</groupId>
+            <artifactId>grpc-protobuf</artifactId>
+            <scope>provided</scope>
+        </dependency>
+        <dependency>
+            <groupId>io.grpc</groupId>
+            <artifactId>grpc-stub</artifactId>
+            <scope>provided</scope>
+        </dependency>
+        <dependency>
+            <groupId>com.google.protobuf</groupId>
+            <artifactId>protobuf-java</artifactId>
+            <scope>provided</scope>
+        </dependency>
+    </dependencies>
+
+    <build>
+        <plugins>
+            <!-- Add protobuf-maven-plugin and provide ScalaPB as a code 
generation plugin -->
+            <plugin>
+                <groupId>org.xolstice.maven.plugins</groupId>
+                <artifactId>protobuf-maven-plugin</artifactId>
+                <version>0.6.1</version>
+                <configuration>
+                    
<protocArtifact>com.google.protobuf:protoc:${protobuf.version}:exe:${os.detected.classifier}</protocArtifact>
+                    <pluginId>grpc-java</pluginId>
+                    
<pluginArtifact>io.grpc:protoc-gen-grpc-java:${grpc.version}:exe:${os.detected.classifier}</pluginArtifact>
+                    <protoSourceRoot>src/main/protobuf</protoSourceRoot>
+                </configuration>
+                <executions>
+                    <execution>
+                        <goals>
+                            <goal>compile</goal>
+                            <goal>compile-custom</goal>
+                            <goal>test-compile</goal>
+                        </goals>
+                        <configuration>
+                            <pluginParameter>@generated=omit</pluginParameter>
+                        </configuration>
+                    </execution>
+                </executions>
+            </plugin>
+            <plugin>
+                <!-- Used to resolve variables in the 'version' tag -->
+                <groupId>org.codehaus.mojo</groupId>
+                <artifactId>flatten-maven-plugin</artifactId>
+            </plugin>
+        </plugins>
+    </build>
+
+</project>
diff --git a/kyuubi-relocated-spark-connect-rpc/src/main/protobuf/README.md 
b/kyuubi-relocated-spark-connect-rpc/src/main/protobuf/README.md
new file mode 100644
index 0000000..360e547
--- /dev/null
+++ b/kyuubi-relocated-spark-connect-rpc/src/main/protobuf/README.md
@@ -0,0 +1,8 @@
+proto files are copied from
+https://github.com/apache/spark/tree/5b2d2149b615acdd8730547a1f24c2b637222545/sql/connect/common/src/main/protobuf
+
+and with one additional change in each proto file
+```patch
+- option java_package = "org.apache.spark.connect.proto"
++ option java_package = "org.apache.kyuubi.shaded.spark.connect.proto"
+```
diff --git 
a/kyuubi-relocated-spark-connect-rpc/src/main/protobuf/spark/connect/base.proto 
b/kyuubi-relocated-spark-connect-rpc/src/main/protobuf/spark/connect/base.proto
new file mode 100644
index 0000000..0c3498a
--- /dev/null
+++ 
b/kyuubi-relocated-spark-connect-rpc/src/main/protobuf/spark/connect/base.proto
@@ -0,0 +1,1103 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+syntax = 'proto3';
+
+package spark.connect;
+
+import "google/protobuf/any.proto";
+import "spark/connect/commands.proto";
+import "spark/connect/common.proto";
+import "spark/connect/expressions.proto";
+import "spark/connect/relations.proto";
+import "spark/connect/types.proto";
+
+option java_multiple_files = true;
+option java_package = "org.apache.kyuubi.shaded.spark.connect.proto";
+option go_package = "internal/generated";
+
+// A [[Plan]] is the structure that carries the runtime information for the 
execution from the
+// client to the server. A [[Plan]] can either be of the type [[Relation]] 
which is a reference
+// to the underlying logical plan or it can be of the [[Command]] type that is 
used to execute
+// commands on the server.
+message Plan {
+  oneof op_type {
+    Relation root = 1;
+    Command command = 2;
+  }
+}
+
+
+
+// User Context is used to refer to one particular user session that is 
executing
+// queries in the backend.
+message UserContext {
+  string user_id = 1;
+  string user_name = 2;
+
+  // To extend the existing user context message that is used to identify 
incoming requests,
+  // Spark Connect leverages the Any protobuf type that can be used to inject 
arbitrary other
+  // messages into this message. Extensions are stored as a `repeated` type to 
be able to
+  // handle multiple active extensions.
+  repeated google.protobuf.Any extensions = 999;
+}
+
+// Request to perform plan analyze, optionally to explain the plan.
+message AnalyzePlanRequest {
+  // (Required)
+  //
+  // The session_id specifies a spark session for a user id (which is specified
+  // by user_context.user_id). The session_id is set by the client to be able 
to
+  // collate streaming responses from different queries within the dedicated 
session.
+  // The id should be an UUID string of the format 
`00112233-4455-6677-8899-aabbccddeeff`
+  string session_id = 1;
+
+  // (Optional)
+  //
+  // Server-side generated idempotency key from the previous responses (if 
any). Server
+  // can use this to validate that the server side session has not changed.
+  optional string client_observed_server_side_session_id = 17;
+
+  // (Required) User context
+  UserContext user_context = 2;
+
+  // Provides optional information about the client sending the request. This 
field
+  // can be used for language or version specific information and is only 
intended for
+  // logging purposes and will not be interpreted by the server.
+  optional string client_type = 3;
+
+  oneof analyze {
+    Schema schema = 4;
+    Explain explain = 5;
+    TreeString tree_string = 6;
+    IsLocal is_local = 7;
+    IsStreaming is_streaming = 8;
+    InputFiles input_files = 9;
+    SparkVersion spark_version = 10;
+    DDLParse ddl_parse = 11;
+    SameSemantics same_semantics = 12;
+    SemanticHash semantic_hash = 13;
+    Persist persist = 14;
+    Unpersist unpersist = 15;
+    GetStorageLevel get_storage_level = 16;
+  }
+
+  message Schema {
+    // (Required) The logical plan to be analyzed.
+    Plan plan = 1;
+  }
+
+  // Explains the input plan based on a configurable mode.
+  message Explain {
+    // (Required) The logical plan to be analyzed.
+    Plan plan = 1;
+
+    // (Required) For analyzePlan rpc calls, configure the mode to explain 
plan in strings.
+    ExplainMode explain_mode = 2;
+
+    // Plan explanation mode.
+    enum ExplainMode {
+      EXPLAIN_MODE_UNSPECIFIED = 0;
+
+      // Generates only physical plan.
+      EXPLAIN_MODE_SIMPLE = 1;
+
+      // Generates parsed logical plan, analyzed logical plan, optimized 
logical plan and physical plan.
+      // Parsed Logical plan is a unresolved plan that extracted from the 
query. Analyzed logical plans
+      // transforms which translates unresolvedAttribute and 
unresolvedRelation into fully typed objects.
+      // The optimized logical plan transforms through a set of optimization 
rules, resulting in the
+      // physical plan.
+      EXPLAIN_MODE_EXTENDED = 2;
+
+      // Generates code for the statement, if any and a physical plan.
+      EXPLAIN_MODE_CODEGEN = 3;
+
+      // If plan node statistics are available, generates a logical plan and 
also the statistics.
+      EXPLAIN_MODE_COST = 4;
+
+      // Generates a physical plan outline and also node details.
+      EXPLAIN_MODE_FORMATTED = 5;
+    }
+  }
+
+  message TreeString {
+    // (Required) The logical plan to be analyzed.
+    Plan plan = 1;
+
+    // (Optional) Max level of the schema.
+    optional int32 level = 2;
+  }
+
+  message IsLocal {
+    // (Required) The logical plan to be analyzed.
+    Plan plan = 1;
+  }
+
+  message IsStreaming {
+    // (Required) The logical plan to be analyzed.
+    Plan plan = 1;
+  }
+
+  message InputFiles {
+    // (Required) The logical plan to be analyzed.
+    Plan plan = 1;
+  }
+
+  message SparkVersion { }
+
+  message DDLParse {
+    // (Required) The DDL formatted string to be parsed.
+    string ddl_string = 1;
+  }
+
+
+  // Returns `true` when the logical query plans  are equal and therefore 
return same results.
+  message SameSemantics {
+    // (Required) The plan to be compared.
+    Plan target_plan = 1;
+
+    // (Required) The other plan to be compared.
+    Plan other_plan = 2;
+  }
+
+  message SemanticHash {
+    // (Required) The logical plan to get a hashCode.
+    Plan plan = 1;
+  }
+
+  message Persist {
+    // (Required) The logical plan to persist.
+    Relation relation = 1;
+
+    // (Optional) The storage level.
+    optional StorageLevel storage_level = 2;
+  }
+
+  message Unpersist {
+    // (Required) The logical plan to unpersist.
+    Relation relation = 1;
+
+    // (Optional) Whether to block until all blocks are deleted.
+    optional bool blocking = 2;
+  }
+
+  message GetStorageLevel {
+    // (Required) The logical plan to get the storage level.
+    Relation relation = 1;
+  }
+}
+
+// Response to performing analysis of the query. Contains relevant metadata to 
be able to
+// reason about the performance.
+// Next ID: 16
+message AnalyzePlanResponse {
+  string session_id = 1;
+  // Server-side generated idempotency key that the client can use to assert 
that the server side
+  // session has not changed.
+  string server_side_session_id = 15;
+
+  oneof result {
+    Schema schema = 2;
+    Explain explain = 3;
+    TreeString tree_string = 4;
+    IsLocal is_local = 5;
+    IsStreaming is_streaming = 6;
+    InputFiles input_files = 7;
+    SparkVersion spark_version = 8;
+    DDLParse ddl_parse = 9;
+    SameSemantics same_semantics = 10;
+    SemanticHash semantic_hash = 11;
+    Persist persist = 12;
+    Unpersist unpersist = 13;
+    GetStorageLevel get_storage_level = 14;
+  }
+
+  message Schema {
+    DataType schema = 1;
+  }
+
+  message Explain {
+    string explain_string = 1;
+  }
+
+  message TreeString {
+    string tree_string = 1;
+  }
+
+  message IsLocal {
+    bool is_local = 1;
+  }
+
+  message IsStreaming {
+    bool is_streaming = 1;
+  }
+
+  message InputFiles {
+    // A best-effort snapshot of the files that compose this Dataset
+    repeated string files = 1;
+  }
+
+  message SparkVersion {
+    string version = 1;
+  }
+
+  message DDLParse {
+    DataType parsed = 1;
+  }
+
+  message SameSemantics {
+    bool result = 1;
+  }
+
+  message SemanticHash {
+    int32 result = 1;
+  }
+
+  message Persist { }
+
+  message Unpersist { }
+
+  message GetStorageLevel {
+    // (Required) The StorageLevel as a result of get_storage_level request.
+    StorageLevel storage_level = 1;
+  }
+}
+
+// A request to be executed by the service.
+message ExecutePlanRequest {
+  // (Required)
+  //
+  // The session_id specifies a spark session for a user id (which is specified
+  // by user_context.user_id). The session_id is set by the client to be able 
to
+  // collate streaming responses from different queries within the dedicated 
session.
+  // The id should be an UUID string of the format 
`00112233-4455-6677-8899-aabbccddeeff`
+  string session_id = 1;
+
+  // (Optional)
+  //
+  // Server-side generated idempotency key from the previous responses (if 
any). Server
+  // can use this to validate that the server side session has not changed.
+  optional string client_observed_server_side_session_id = 8;
+
+  // (Required) User context
+  //
+  // user_context.user_id and session+id both identify a unique remote spark 
session on the
+  // server side.
+  UserContext user_context = 2;
+
+  // (Optional)
+  // Provide an id for this request. If not provided, it will be generated by 
the server.
+  // It is returned in every ExecutePlanResponse.operation_id of the 
ExecutePlan response stream.
+  // The id must be an UUID string of the format 
`00112233-4455-6677-8899-aabbccddeeff`
+  optional string operation_id = 6;
+
+  // (Required) The logical plan to be executed / analyzed.
+  Plan plan = 3;
+
+  // Provides optional information about the client sending the request. This 
field
+  // can be used for language or version specific information and is only 
intended for
+  // logging purposes and will not be interpreted by the server.
+  optional string client_type = 4;
+
+  // Repeated element for options that can be passed to the request. This 
element is currently
+  // unused but allows to pass in an extension value used for arbitrary 
options.
+  repeated RequestOption request_options = 5;
+
+  message RequestOption {
+    oneof request_option {
+      ReattachOptions reattach_options = 1;
+      // Extension type for request options
+      google.protobuf.Any extension = 999;
+    }
+  }
+
+  // Tags to tag the given execution with.
+  // Tags cannot contain ',' character and cannot be empty strings.
+  // Used by Interrupt with interrupt.tag.
+  repeated string tags = 7;
+}
+
+// The response of a query, can be one or more for each request. Responses 
belonging to the
+// same input query, carry the same `session_id`.
+// Next ID: 17
+message ExecutePlanResponse {
+  string session_id = 1;
+  // Server-side generated idempotency key that the client can use to assert 
that the server side
+  // session has not changed.
+  string server_side_session_id = 15;
+
+  // Identifies the ExecutePlan execution.
+  // If set by the client in ExecutePlanRequest.operationId, that value is 
returned.
+  // Otherwise generated by the server.
+  // It is an UUID string of the format `00112233-4455-6677-8899-aabbccddeeff`
+  string operation_id = 12;
+
+  // Identified the response in the stream.
+  // The id is an UUID string of the format 
`00112233-4455-6677-8899-aabbccddeeff`
+  string response_id = 13;
+
+  // Union type for the different response messages.
+  oneof response_type {
+    ArrowBatch arrow_batch = 2;
+
+    // Special case for executing SQL commands.
+    SqlCommandResult sql_command_result = 5;
+
+    // Response for a streaming query.
+    WriteStreamOperationStartResult write_stream_operation_start_result = 8;
+
+    // Response for commands on a streaming query.
+    StreamingQueryCommandResult streaming_query_command_result = 9;
+
+    // Response for 'SparkContext.resources'.
+    GetResourcesCommandResult get_resources_command_result = 10;
+
+    // Response for commands on the streaming query manager.
+    StreamingQueryManagerCommandResult streaming_query_manager_command_result 
= 11;
+
+    // Response for commands on the client side streaming query listener.
+    StreamingQueryListenerEventsResult streaming_query_listener_events_result 
= 16;
+
+    // Response type informing if the stream is complete in reattachable 
execution.
+    ResultComplete result_complete = 14;
+
+    // Response for command that creates ResourceProfile.
+    CreateResourceProfileCommandResult create_resource_profile_command_result 
= 17;
+
+    // (Optional) Intermediate query progress reports.
+    ExecutionProgress execution_progress = 18;
+
+    // Response for command that checkpoints a DataFrame.
+    CheckpointCommandResult checkpoint_command_result = 19;
+
+    // Support arbitrary result objects.
+    google.protobuf.Any extension = 999;
+  }
+
+  // Metrics for the query execution. Typically, this field is only present in 
the last
+  // batch of results and then represent the overall state of the query 
execution.
+  Metrics metrics = 4;
+
+  // The metrics observed during the execution of the query plan.
+  repeated ObservedMetrics observed_metrics = 6;
+
+  // (Optional) The Spark schema. This field is available when `collect` is 
called.
+  DataType schema = 7;
+
+  // A SQL command returns an opaque Relation that can be directly used as 
input for the next
+  // call.
+  message SqlCommandResult {
+    Relation relation = 1;
+  }
+
+  // Batch results of metrics.
+  message ArrowBatch {
+    // Count rows in `data`. Must match the number of rows inside `data`.
+    int64 row_count = 1;
+    // Serialized Arrow data.
+    bytes data = 2;
+
+    // If set, row offset of the start of this ArrowBatch in execution results.
+    optional int64 start_offset = 3;
+  }
+
+  message Metrics {
+
+    repeated MetricObject metrics = 1;
+
+    message MetricObject {
+      string name = 1;
+      int64 plan_id = 2;
+      int64 parent = 3;
+      map<string, MetricValue> execution_metrics = 4;
+    }
+
+    message MetricValue {
+      string name = 1;
+      int64 value = 2;
+      string metric_type = 3;
+    }
+  }
+
+  message ObservedMetrics {
+    string name = 1;
+    repeated Expression.Literal values = 2;
+    repeated string keys = 3;
+    int64 plan_id = 4;
+  }
+
+  message ResultComplete {
+    // If present, in a reattachable execution this means that after server 
sends onComplete,
+    // the execution is complete. If the server sends onComplete without 
sending a ResultComplete,
+    // it means that there is more, and the client should use ReattachExecute 
RPC to continue.
+  }
+
+  // This message is used to communicate progress about the query progress 
during the execution.
+  message ExecutionProgress {
+    // Captures the progress of each individual stage.
+    repeated StageInfo stages = 1;
+
+    // Captures the currently in progress tasks.
+    int64 num_inflight_tasks = 2;
+
+    message StageInfo {
+      int64 stage_id = 1;
+      int64 num_tasks = 2;
+      int64 num_completed_tasks = 3;
+      int64 input_bytes_read = 4;
+      bool done = 5;
+    }
+  }
+}
+
+// The key-value pair for the config request and response.
+message KeyValue {
+  // (Required) The key.
+  string key = 1;
+  // (Optional) The value.
+  optional string value = 2;
+}
+
+// Request to update or fetch the configurations.
+message ConfigRequest {
+  // (Required)
+  //
+  // The session_id specifies a spark session for a user id (which is specified
+  // by user_context.user_id). The session_id is set by the client to be able 
to
+  // collate streaming responses from different queries within the dedicated 
session.
+  // The id should be an UUID string of the format 
`00112233-4455-6677-8899-aabbccddeeff`
+  string session_id = 1;
+
+  // (Optional)
+  //
+  // Server-side generated idempotency key from the previous responses (if 
any). Server
+  // can use this to validate that the server side session has not changed.
+  optional string client_observed_server_side_session_id = 8;
+
+  // (Required) User context
+  UserContext user_context = 2;
+
+  // (Required) The operation for the config.
+  Operation operation = 3;
+
+  // Provides optional information about the client sending the request. This 
field
+  // can be used for language or version specific information and is only 
intended for
+  // logging purposes and will not be interpreted by the server.
+  optional string client_type = 4;
+
+  message Operation {
+    oneof op_type {
+      Set set = 1;
+      Get get = 2;
+      GetWithDefault get_with_default = 3;
+      GetOption get_option = 4;
+      GetAll get_all = 5;
+      Unset unset = 6;
+      IsModifiable is_modifiable = 7;
+    }
+  }
+
+  message Set {
+    // (Required) The config key-value pairs to set.
+    repeated KeyValue pairs = 1;
+  }
+
+  message Get {
+    // (Required) The config keys to get.
+    repeated string keys = 1;
+  }
+
+  message GetWithDefault {
+    // (Required) The config key-value paris to get. The value will be used as 
the default value.
+    repeated KeyValue pairs = 1;
+  }
+
+  message GetOption {
+    // (Required) The config keys to get optionally.
+    repeated string keys = 1;
+  }
+
+  message GetAll {
+    // (Optional) The prefix of the config key to get.
+    optional string prefix = 1;
+  }
+
+  message Unset {
+    // (Required) The config keys to unset.
+    repeated string keys = 1;
+  }
+
+  message IsModifiable {
+    // (Required) The config keys to check the config is modifiable.
+    repeated string keys = 1;
+  }
+}
+
+// Response to the config request.
+// Next ID: 5
+message ConfigResponse {
+  string session_id = 1;
+  // Server-side generated idempotency key that the client can use to assert 
that the server side
+  // session has not changed.
+  string server_side_session_id = 4;
+
+  // (Optional) The result key-value pairs.
+  //
+  // Available when the operation is 'Get', 'GetWithDefault', 'GetOption', 
'GetAll'.
+  // Also available for the operation 'IsModifiable' with boolean string 
"true" and "false".
+  repeated KeyValue pairs = 2;
+
+  // (Optional)
+  //
+  // Warning messages for deprecated or unsupported configurations.
+  repeated string warnings = 3;
+}
+
+// Request to transfer client-local artifacts.
+message AddArtifactsRequest {
+
+  // (Required)
+  //
+  // The session_id specifies a spark session for a user id (which is specified
+  // by user_context.user_id). The session_id is set by the client to be able 
to
+  // collate streaming responses from different queries within the dedicated 
session.
+  // The id should be an UUID string of the format 
`00112233-4455-6677-8899-aabbccddeeff`
+  string session_id = 1;
+
+  // User context
+  UserContext user_context = 2;
+
+  // (Optional)
+  //
+  // Server-side generated idempotency key from the previous responses (if 
any). Server
+  // can use this to validate that the server side session has not changed.
+  optional string client_observed_server_side_session_id = 7;
+
+  // Provides optional information about the client sending the request. This 
field
+  // can be used for language or version specific information and is only 
intended for
+  // logging purposes and will not be interpreted by the server.
+  optional string client_type = 6;
+
+  // A chunk of an Artifact.
+  message ArtifactChunk {
+    // Data chunk.
+    bytes data = 1;
+    // CRC to allow server to verify integrity of the chunk.
+    int64 crc = 2;
+  }
+
+  // An artifact that is contained in a single `ArtifactChunk`.
+  // Generally, this message represents tiny artifacts such as REPL-generated 
class files.
+  message SingleChunkArtifact {
+    // The name of the artifact is expected in the form of a "Relative Path" 
that is made up of a
+    // sequence of directories and the final file element.
+    // Examples of "Relative Path"s: "jars/test.jar", "classes/xyz.class", 
"abc.xyz", "a/b/X.jar".
+    // The server is expected to maintain the hierarchy of files as defined by 
their name. (i.e
+    // The relative path of the file on the server's filesystem will be the 
same as the name of
+    // the provided artifact)
+    string name = 1;
+    // A single data chunk.
+    ArtifactChunk data = 2;
+  }
+
+  // A number of `SingleChunkArtifact` batched into a single RPC.
+  message Batch {
+    repeated SingleChunkArtifact artifacts = 1;
+  }
+
+  // Signals the beginning/start of a chunked artifact.
+  // A large artifact is transferred through a payload of 
`BeginChunkedArtifact` followed by a
+  // sequence of `ArtifactChunk`s.
+  message BeginChunkedArtifact {
+    // Name of the artifact undergoing chunking. Follows the same conventions 
as the `name` in
+    // the `Artifact` message.
+    string name = 1;
+    // Total size of the artifact in bytes.
+    int64 total_bytes = 2;
+    // Number of chunks the artifact is split into.
+    // This includes the `initial_chunk`.
+    int64 num_chunks = 3;
+    // The first/initial chunk.
+    ArtifactChunk initial_chunk = 4;
+  }
+
+  // The payload is either a batch of artifacts or a partial chunk of a large 
artifact.
+  oneof payload {
+    Batch batch = 3;
+    // The metadata and the initial chunk of a large artifact chunked into 
multiple requests.
+    // The server side is notified about the total size of the large artifact 
as well as the
+    // number of chunks to expect.
+    BeginChunkedArtifact begin_chunk = 4;
+    // A chunk of an artifact excluding metadata. This can be any chunk of a 
large artifact
+    // excluding the first chunk (which is included in `BeginChunkedArtifact`).
+    ArtifactChunk chunk = 5;
+  }
+}
+
+// Response to adding an artifact. Contains relevant metadata to verify 
successful transfer of
+// artifact(s).
+// Next ID: 4
+message AddArtifactsResponse {
+  // Session id in which the AddArtifact was running.
+  string session_id = 2;
+  // Server-side generated idempotency key that the client can use to assert 
that the server side
+  // session has not changed.
+  string server_side_session_id = 3;
+
+  // The list of artifact(s) seen by the server.
+  repeated ArtifactSummary artifacts = 1;
+
+  // Metadata of an artifact.
+  message ArtifactSummary {
+    string name = 1;
+    // Whether the CRC (Cyclic Redundancy Check) is successful on server 
verification.
+    // The server discards any artifact that fails the CRC.
+    // If false, the client may choose to resend the artifact specified by 
`name`.
+    bool is_crc_successful = 2;
+  }
+}
+
+// Request to get current statuses of artifacts at the server side.
+message ArtifactStatusesRequest {
+  // (Required)
+  //
+  // The session_id specifies a spark session for a user id (which is specified
+  // by user_context.user_id). The session_id is set by the client to be able 
to
+  // collate streaming responses from different queries within the dedicated 
session.
+  // The id should be an UUID string of the format 
`00112233-4455-6677-8899-aabbccddeeff`
+  string session_id = 1;
+
+  // (Optional)
+  //
+  // Server-side generated idempotency key from the previous responses (if 
any). Server
+  // can use this to validate that the server side session has not changed.
+  optional string client_observed_server_side_session_id = 5;
+
+  // User context
+  UserContext user_context = 2;
+
+  // Provides optional information about the client sending the request. This 
field
+  // can be used for language or version specific information and is only 
intended for
+  // logging purposes and will not be interpreted by the server.
+  optional string client_type = 3;
+
+  // The name of the artifact is expected in the form of a "Relative Path" 
that is made up of a
+  // sequence of directories and the final file element.
+  // Examples of "Relative Path"s: "jars/test.jar", "classes/xyz.class", 
"abc.xyz", "a/b/X.jar".
+  // The server is expected to maintain the hierarchy of files as defined by 
their name. (i.e
+  // The relative path of the file on the server's filesystem will be the same 
as the name of
+  // the provided artifact)
+  repeated string names = 4;
+}
+
+// Response to checking artifact statuses.
+// Next ID: 4
+message ArtifactStatusesResponse {
+  // Session id in which the ArtifactStatus was running.
+  string session_id = 2;
+  // Server-side generated idempotency key that the client can use to assert 
that the server side
+  // session has not changed.
+  string server_side_session_id = 3;
+  // A map of artifact names to their statuses.
+  map<string, ArtifactStatus> statuses = 1;
+
+  message ArtifactStatus {
+    // Exists or not particular artifact at the server.
+    bool exists = 1;
+  }
+}
+
+message InterruptRequest {
+  // (Required)
+  //
+  // The session_id specifies a spark session for a user id (which is specified
+  // by user_context.user_id). The session_id is set by the client to be able 
to
+  // collate streaming responses from different queries within the dedicated 
session.
+  // The id should be an UUID string of the format 
`00112233-4455-6677-8899-aabbccddeeff`
+  string session_id = 1;
+
+  // (Optional)
+  //
+  // Server-side generated idempotency key from the previous responses (if 
any). Server
+  // can use this to validate that the server side session has not changed.
+  optional string client_observed_server_side_session_id = 7;
+
+  // (Required) User context
+  UserContext user_context = 2;
+
+  // Provides optional information about the client sending the request. This 
field
+  // can be used for language or version specific information and is only 
intended for
+  // logging purposes and will not be interpreted by the server.
+  optional string client_type = 3;
+
+  // (Required) The type of interrupt to execute.
+  InterruptType interrupt_type = 4;
+
+  enum InterruptType {
+    INTERRUPT_TYPE_UNSPECIFIED = 0;
+
+    // Interrupt all running executions within the session with the provided 
session_id.
+    INTERRUPT_TYPE_ALL = 1;
+
+    // Interrupt all running executions within the session with the provided 
operation_tag.
+    INTERRUPT_TYPE_TAG = 2;
+
+    // Interrupt the running execution within the session with the provided 
operation_id.
+    INTERRUPT_TYPE_OPERATION_ID = 3;
+  }
+
+  oneof interrupt {
+    // if interrupt_tag == INTERRUPT_TYPE_TAG, interrupt operation with this 
tag.
+    string operation_tag = 5;
+
+    // if interrupt_tag == INTERRUPT_TYPE_OPERATION_ID, interrupt operation 
with this operation_id.
+    string operation_id = 6;
+  }
+}
+
+// Next ID: 4
+message InterruptResponse {
+  // Session id in which the interrupt was running.
+  string session_id = 1;
+  // Server-side generated idempotency key that the client can use to assert 
that the server side
+  // session has not changed.
+  string server_side_session_id = 3;
+
+  // Operation ids of the executions which were interrupted.
+  repeated string interrupted_ids = 2;
+
+}
+
+message ReattachOptions {
+  // If true, the request can be reattached to using ReattachExecute.
+  // ReattachExecute can be used either if the stream broke with a GRPC 
network error,
+  // or if the server closed the stream without sending a response with 
StreamStatus.complete=true.
+  // The server will keep a buffer of responses in case a response is lost, and
+  // ReattachExecute needs to back-track.
+  //
+  // If false, the execution response stream will will not be reattachable, 
and all responses are
+  // immediately released by the server after being sent.
+  bool reattachable = 1;
+}
+
+message ReattachExecuteRequest {
+  // (Required)
+  //
+  // The session_id of the request to reattach to.
+  // This must be an id of existing session.
+  string session_id = 1;
+
+  // (Optional)
+  //
+  // Server-side generated idempotency key from the previous responses (if 
any). Server
+  // can use this to validate that the server side session has not changed.
+  optional string client_observed_server_side_session_id = 6;
+
+  // (Required) User context
+  //
+  // user_context.user_id and session+id both identify a unique remote spark 
session on the
+  // server side.
+  UserContext user_context = 2;
+
+  // (Required)
+  // Provide an id of the request to reattach to.
+  // This must be an id of existing operation.
+  string operation_id = 3;
+
+  // Provides optional information about the client sending the request. This 
field
+  // can be used for language or version specific information and is only 
intended for
+  // logging purposes and will not be interpreted by the server.
+  optional string client_type = 4;
+
+  // (Optional)
+  // Last already processed response id from the response stream.
+  // After reattach, server will resume the response stream after that 
response.
+  // If not specified, server will restart the stream from the start.
+  //
+  // Note: server controls the amount of responses that it buffers and it may 
drop responses,
+  // that are far behind the latest returned response, so this can't be used 
to arbitrarily
+  // scroll back the cursor. If the response is no longer available, this will 
result in an error.
+  optional string last_response_id = 5;
+}
+
+message ReleaseExecuteRequest {
+  // (Required)
+  //
+  // The session_id of the request to reattach to.
+  // This must be an id of existing session.
+  string session_id = 1;
+
+  // (Optional)
+  //
+  // Server-side generated idempotency key from the previous responses (if 
any). Server
+  // can use this to validate that the server side session has not changed.
+  optional string client_observed_server_side_session_id = 7;
+
+  // (Required) User context
+  //
+  // user_context.user_id and session+id both identify a unique remote spark 
session on the
+  // server side.
+  UserContext user_context = 2;
+
+  // (Required)
+  // Provide an id of the request to reattach to.
+  // This must be an id of existing operation.
+  string operation_id = 3;
+
+  // Provides optional information about the client sending the request. This 
field
+  // can be used for language or version specific information and is only 
intended for
+  // logging purposes and will not be interpreted by the server.
+  optional string client_type = 4;
+
+  // Release and close operation completely.
+  // This will also interrupt the query if it is running execution, and wait 
for it to be torn down.
+  message ReleaseAll {}
+
+  // Release all responses from the operation response stream up to and 
including
+  // the response with the given by response_id.
+  // While server determines by itself how much of a buffer of responses to 
keep, client providing
+  // explicit release calls will help reduce resource consumption.
+  // Noop if response_id not found in cached responses.
+  message ReleaseUntil {
+    string response_id = 1;
+  }
+
+  oneof release {
+    ReleaseAll release_all = 5;
+    ReleaseUntil release_until = 6;
+  }
+}
+
+// Next ID: 4
+message ReleaseExecuteResponse {
+  // Session id in which the release was running.
+  string session_id = 1;
+  // Server-side generated idempotency key that the client can use to assert 
that the server side
+  // session has not changed.
+  string server_side_session_id = 3;
+
+  // Operation id of the operation on which the release executed.
+  // If the operation couldn't be found (because e.g. it was concurrently 
released), will be unset.
+  // Otherwise, it will be equal to the operation_id from request.
+  optional string operation_id = 2;
+}
+
+message ReleaseSessionRequest {
+  // (Required)
+  //
+  // The session_id of the request to reattach to.
+  // This must be an id of existing session.
+  string session_id = 1;
+
+  // (Required) User context
+  //
+  // user_context.user_id and session+id both identify a unique remote spark 
session on the
+  // server side.
+  UserContext user_context = 2;
+
+  // Provides optional information about the client sending the request. This 
field
+  // can be used for language or version specific information and is only 
intended for
+  // logging purposes and will not be interpreted by the server.
+  optional string client_type = 3;
+}
+
+// Next ID: 3
+message ReleaseSessionResponse {
+  // Session id of the session on which the release executed.
+  string session_id = 1;
+  // Server-side generated idempotency key that the client can use to assert 
that the server side
+  // session has not changed.
+  string server_side_session_id = 2;
+}
+
+message FetchErrorDetailsRequest {
+
+  // (Required)
+  // The session_id specifies a Spark session for a user identified by 
user_context.user_id.
+  // The id should be a UUID string of the format 
`00112233-4455-6677-8899-aabbccddeeff`.
+  string session_id = 1;
+
+  // (Optional)
+  //
+  // Server-side generated idempotency key from the previous responses (if 
any). Server
+  // can use this to validate that the server side session has not changed.
+  optional string client_observed_server_side_session_id = 5;
+
+  // User context
+  UserContext user_context = 2;
+
+  // (Required)
+  // The id of the error.
+  string error_id = 3;
+
+  // Provides optional information about the client sending the request. This 
field
+  // can be used for language or version specific information and is only 
intended for
+  // logging purposes and will not be interpreted by the server.
+  optional string client_type = 4;
+}
+
+// Next ID: 5
+message FetchErrorDetailsResponse {
+
+  // Server-side generated idempotency key that the client can use to assert 
that the server side
+  // session has not changed.
+  string server_side_session_id = 3;
+
+  string session_id = 4;
+
+  // The index of the root error in errors. The field will not be set if the 
error is not found.
+  optional int32 root_error_idx = 1;
+
+  // A list of errors.
+  repeated Error errors = 2;
+
+  message StackTraceElement {
+    // The fully qualified name of the class containing the execution point.
+    string declaring_class = 1;
+
+    // The name of the method containing the execution point.
+    string method_name = 2;
+
+    // The name of the file containing the execution point.
+    optional string file_name = 3;
+
+    // The line number of the source line containing the execution point.
+    int32 line_number = 4;
+  }
+
+  // QueryContext defines the schema for the query context of a SparkThrowable.
+  // It helps users understand where the error occurs while executing queries.
+  message QueryContext {
+    // The type of this query context.
+    enum ContextType {
+      SQL = 0;
+      DATAFRAME = 1;
+    }
+    ContextType context_type = 10;
+
+    // The object type of the query which throws the exception.
+    // If the exception is directly from the main query, it should be an empty 
string.
+    // Otherwise, it should be the exact object type in upper case. For 
example, a "VIEW".
+    string object_type = 1;
+
+    // The object name of the query which throws the exception.
+    // If the exception is directly from the main query, it should be an empty 
string.
+    // Otherwise, it should be the object name. For example, a view name "V1".
+    string object_name = 2;
+
+    // The starting index in the query text which throws the exception. The 
index starts from 0.
+    int32 start_index = 3;
+
+    // The stopping index in the query which throws the exception. The index 
starts from 0.
+    int32 stop_index = 4;
+
+    // The corresponding fragment of the query which throws the exception.
+    string fragment = 5;
+
+    // The user code (call site of the API) that caused throwing the exception.
+    string call_site = 6;
+
+    // Summary of the exception cause.
+    string summary = 7;
+  }
+
+  // SparkThrowable defines the schema for SparkThrowable exceptions.
+  message SparkThrowable {
+    // Succinct, human-readable, unique, and consistent representation of the 
error category.
+    optional string error_class = 1;
+
+    // The message parameters for the error framework.
+    map<string, string> message_parameters = 2;
+
+    // The query context of a SparkThrowable.
+    repeated QueryContext query_contexts = 3;
+
+    // Portable error identifier across SQL engines
+    // If null, error class or SQLSTATE is not set.
+    optional string sql_state = 4;
+  }
+
+  // Error defines the schema for the representing exception.
+  message Error {
+    // The fully qualified names of the exception class and its parent classes.
+    repeated string error_type_hierarchy = 1;
+
+    // The detailed message of the exception.
+    string message = 2;
+
+    // The stackTrace of the exception. It will be set
+    // if the SQLConf spark.sql.connect.serverStacktrace.enabled is true.
+    repeated StackTraceElement stack_trace = 3;
+
+    // The index of the cause error in errors.
+    optional int32 cause_idx = 4;
+
+    // The structured data of a SparkThrowable exception.
+    optional SparkThrowable spark_throwable = 5;
+  }
+}
+
+message CheckpointCommandResult {
+  // (Required) The logical plan checkpointed.
+  CachedRemoteRelation relation = 1;
+}
+
+// Main interface for the SparkConnect service.
+service SparkConnectService {
+
+  // Executes a request that contains the query and returns a stream of 
[[Response]].
+  //
+  // It is guaranteed that there is at least one ARROW batch returned even if 
the result set is empty.
+  rpc ExecutePlan(ExecutePlanRequest) returns (stream ExecutePlanResponse) {}
+
+  // Analyzes a query and returns a [[AnalyzeResponse]] containing metadata 
about the query.
+  rpc AnalyzePlan(AnalyzePlanRequest) returns (AnalyzePlanResponse) {}
+
+  // Update or fetch the configurations and returns a [[ConfigResponse]] 
containing the result.
+  rpc Config(ConfigRequest) returns (ConfigResponse) {}
+
+  // Add artifacts to the session and returns a [[AddArtifactsResponse]] 
containing metadata about
+  // the added artifacts.
+  rpc AddArtifacts(stream AddArtifactsRequest) returns (AddArtifactsResponse) 
{}
+
+  // Check statuses of artifacts in the session and returns them in a 
[[ArtifactStatusesResponse]]
+  rpc ArtifactStatus(ArtifactStatusesRequest) returns 
(ArtifactStatusesResponse) {}
+
+  // Interrupts running executions
+  rpc Interrupt(InterruptRequest) returns (InterruptResponse) {}
+
+  // Reattach to an existing reattachable execution.
+  // The ExecutePlan must have been started with 
ReattachOptions.reattachable=true.
+  // If the ExecutePlanResponse stream ends without a ResultComplete message, 
there is more to
+  // continue. If there is a ResultComplete, the client should use 
ReleaseExecute with
+  rpc ReattachExecute(ReattachExecuteRequest) returns (stream 
ExecutePlanResponse) {}
+
+  // Release an reattachable execution, or parts thereof.
+  // The ExecutePlan must have been started with 
ReattachOptions.reattachable=true.
+  // Non reattachable executions are released automatically and immediately 
after the ExecutePlan
+  // RPC and ReleaseExecute may not be used.
+  rpc ReleaseExecute(ReleaseExecuteRequest) returns (ReleaseExecuteResponse) {}
+
+  // Release a session.
+  // All the executions in the session will be released. Any further requests 
for the session with
+  // that session_id for the given user_id will fail. If the session didn't 
exist or was already
+  // released, this is a noop.
+  rpc ReleaseSession(ReleaseSessionRequest) returns (ReleaseSessionResponse) {}
+
+  // FetchErrorDetails retrieves the matched exception with details based on a 
provided error id.
+  rpc FetchErrorDetails(FetchErrorDetailsRequest) returns 
(FetchErrorDetailsResponse) {}
+}
diff --git 
a/kyuubi-relocated-spark-connect-rpc/src/main/protobuf/spark/connect/catalog.proto
 
b/kyuubi-relocated-spark-connect-rpc/src/main/protobuf/spark/connect/catalog.proto
new file mode 100644
index 0000000..06e6282
--- /dev/null
+++ 
b/kyuubi-relocated-spark-connect-rpc/src/main/protobuf/spark/connect/catalog.proto
@@ -0,0 +1,243 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+syntax = 'proto3';
+
+package spark.connect;
+
+import "spark/connect/common.proto";
+import "spark/connect/types.proto";
+
+option java_multiple_files = true;
+option java_package = "org.apache.kyuubi.shaded.spark.connect.proto";
+option go_package = "internal/generated";
+
+// Catalog messages are marked as unstable.
+message Catalog {
+  oneof cat_type {
+    CurrentDatabase current_database = 1;
+    SetCurrentDatabase set_current_database = 2;
+    ListDatabases list_databases = 3;
+    ListTables list_tables = 4;
+    ListFunctions list_functions = 5;
+    ListColumns list_columns = 6;
+    GetDatabase get_database = 7;
+    GetTable get_table = 8;
+    GetFunction get_function = 9;
+    DatabaseExists database_exists = 10;
+    TableExists table_exists = 11;
+    FunctionExists function_exists = 12;
+    CreateExternalTable create_external_table = 13;
+    CreateTable create_table = 14;
+    DropTempView drop_temp_view = 15;
+    DropGlobalTempView drop_global_temp_view = 16;
+    RecoverPartitions recover_partitions = 17;
+    IsCached is_cached = 18;
+    CacheTable cache_table = 19;
+    UncacheTable uncache_table = 20;
+    ClearCache clear_cache = 21;
+    RefreshTable refresh_table = 22;
+    RefreshByPath refresh_by_path = 23;
+    CurrentCatalog current_catalog = 24;
+    SetCurrentCatalog set_current_catalog = 25;
+    ListCatalogs list_catalogs = 26;
+  }
+}
+
+// See `spark.catalog.currentDatabase`
+message CurrentDatabase { }
+
+// See `spark.catalog.setCurrentDatabase`
+message SetCurrentDatabase {
+  // (Required)
+  string db_name = 1;
+}
+
+// See `spark.catalog.listDatabases`
+message ListDatabases {
+  // (Optional) The pattern that the database name needs to match
+  optional string pattern = 1;
+}
+
+// See `spark.catalog.listTables`
+message ListTables {
+  // (Optional)
+  optional string db_name = 1;
+  // (Optional) The pattern that the table name needs to match
+  optional string pattern = 2;
+}
+
+// See `spark.catalog.listFunctions`
+message ListFunctions {
+  // (Optional)
+  optional string db_name = 1;
+  // (Optional) The pattern that the function name needs to match
+  optional string pattern = 2;
+}
+
+// See `spark.catalog.listColumns`
+message ListColumns {
+  // (Required)
+  string table_name = 1;
+  // (Optional)
+  optional string db_name = 2;
+}
+
+// See `spark.catalog.getDatabase`
+message GetDatabase {
+  // (Required)
+  string db_name = 1;
+}
+
+// See `spark.catalog.getTable`
+message GetTable {
+  // (Required)
+  string table_name = 1;
+  // (Optional)
+  optional string db_name = 2;
+}
+
+// See `spark.catalog.getFunction`
+message GetFunction {
+  // (Required)
+  string function_name = 1;
+  // (Optional)
+  optional string db_name = 2;
+}
+
+// See `spark.catalog.databaseExists`
+message DatabaseExists {
+  // (Required)
+  string db_name = 1;
+}
+
+// See `spark.catalog.tableExists`
+message TableExists {
+  // (Required)
+  string table_name = 1;
+  // (Optional)
+  optional string db_name = 2;
+}
+
+// See `spark.catalog.functionExists`
+message FunctionExists {
+  // (Required)
+  string function_name = 1;
+  // (Optional)
+  optional string db_name = 2;
+}
+
+// See `spark.catalog.createExternalTable`
+message CreateExternalTable {
+  // (Required)
+  string table_name = 1;
+  // (Optional)
+  optional string path = 2;
+  // (Optional)
+  optional string source = 3;
+  // (Optional)
+  optional DataType schema = 4;
+  // Options could be empty for valid data source format.
+  // The map key is case insensitive.
+  map<string, string> options = 5;
+}
+
+// See `spark.catalog.createTable`
+message CreateTable {
+  // (Required)
+  string table_name = 1;
+  // (Optional)
+  optional string path = 2;
+  // (Optional)
+  optional string source = 3;
+  // (Optional)
+  optional string description = 4;
+  // (Optional)
+  optional DataType schema = 5;
+  // Options could be empty for valid data source format.
+  // The map key is case insensitive.
+  map<string, string> options = 6;
+}
+
+// See `spark.catalog.dropTempView`
+message DropTempView {
+  // (Required)
+  string view_name = 1;
+}
+
+// See `spark.catalog.dropGlobalTempView`
+message DropGlobalTempView {
+  // (Required)
+  string view_name = 1;
+}
+
+// See `spark.catalog.recoverPartitions`
+message RecoverPartitions {
+  // (Required)
+  string table_name = 1;
+}
+
+// See `spark.catalog.isCached`
+message IsCached {
+  // (Required)
+  string table_name = 1;
+}
+
+// See `spark.catalog.cacheTable`
+message CacheTable {
+  // (Required)
+  string table_name = 1;
+
+  // (Optional)
+  optional StorageLevel storage_level = 2;
+}
+
+// See `spark.catalog.uncacheTable`
+message UncacheTable {
+  // (Required)
+  string table_name = 1;
+}
+
+// See `spark.catalog.clearCache`
+message ClearCache { }
+
+// See `spark.catalog.refreshTable`
+message RefreshTable {
+  // (Required)
+  string table_name = 1;
+}
+
+// See `spark.catalog.refreshByPath`
+message RefreshByPath {
+  // (Required)
+  string path = 1;
+}
+
+// See `spark.catalog.currentCatalog`
+message CurrentCatalog { }
+
+// See `spark.catalog.setCurrentCatalog`
+message SetCurrentCatalog {
+  // (Required)
+  string catalog_name = 1;
+}
+
+// See `spark.catalog.listCatalogs`
+message ListCatalogs {
+  // (Optional) The pattern that the catalog name needs to match
+  optional string pattern = 1;
+}
diff --git 
a/kyuubi-relocated-spark-connect-rpc/src/main/protobuf/spark/connect/commands.proto
 
b/kyuubi-relocated-spark-connect-rpc/src/main/protobuf/spark/connect/commands.proto
new file mode 100644
index 0000000..f84e33e
--- /dev/null
+++ 
b/kyuubi-relocated-spark-connect-rpc/src/main/protobuf/spark/connect/commands.proto
@@ -0,0 +1,533 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+syntax = 'proto3';
+
+import "google/protobuf/any.proto";
+import "spark/connect/common.proto";
+import "spark/connect/expressions.proto";
+import "spark/connect/relations.proto";
+
+package spark.connect;
+
+option java_multiple_files = true;
+option java_package = "org.apache.kyuubi.shaded.spark.connect.proto";
+option go_package = "internal/generated";
+
+// A [[Command]] is an operation that is executed by the server that does not 
directly consume or
+// produce a relational result.
+message Command {
+  oneof command_type {
+    CommonInlineUserDefinedFunction register_function = 1;
+    WriteOperation write_operation = 2;
+    CreateDataFrameViewCommand create_dataframe_view = 3;
+    WriteOperationV2 write_operation_v2 = 4;
+    SqlCommand sql_command = 5;
+    WriteStreamOperationStart write_stream_operation_start = 6;
+    StreamingQueryCommand streaming_query_command = 7;
+    GetResourcesCommand get_resources_command = 8;
+    StreamingQueryManagerCommand streaming_query_manager_command = 9;
+    CommonInlineUserDefinedTableFunction register_table_function = 10;
+    StreamingQueryListenerBusCommand streaming_query_listener_bus_command = 11;
+    CommonInlineUserDefinedDataSource register_data_source = 12;
+    CreateResourceProfileCommand create_resource_profile_command = 13;
+    CheckpointCommand checkpoint_command = 14;
+    RemoveCachedRemoteRelationCommand remove_cached_remote_relation_command = 
15;
+    MergeIntoTableCommand merge_into_table_command = 16;
+
+    // This field is used to mark extensions to the protocol. When plugins 
generate arbitrary
+    // Commands they can add them here. During the planning the correct 
resolution is done.
+    google.protobuf.Any extension = 999;
+
+  }
+}
+
+// A SQL Command is used to trigger the eager evaluation of SQL commands in 
Spark.
+//
+// When the SQL provide as part of the message is a command it will be 
immediately evaluated
+// and the result will be collected and returned as part of a LocalRelation. 
If the result is
+// not a command, the operation will simply return a SQL Relation. This allows 
the client to be
+// almost oblivious to the server-side behavior.
+message SqlCommand {
+  // (Required) SQL Query.
+  string sql = 1 [deprecated=true];
+
+  // (Optional) A map of parameter names to literal expressions.
+  map<string, Expression.Literal> args = 2 [deprecated=true];
+
+  // (Optional) A sequence of literal expressions for positional parameters in 
the SQL query text.
+  repeated Expression.Literal pos_args = 3 [deprecated=true];
+
+  // (Optional) A map of parameter names to expressions.
+  // It cannot coexist with `pos_arguments`.
+  map<string, Expression> named_arguments = 4 [deprecated=true];
+
+  // (Optional) A sequence of expressions for positional parameters in the SQL 
query text.
+  // It cannot coexist with `named_arguments`.
+  repeated Expression pos_arguments = 5 [deprecated=true];
+
+  // (Optional) The relation that this SQL command will be built on.
+  Relation input = 6;
+}
+
+// A command that can create DataFrame global temp view or local temp view.
+message CreateDataFrameViewCommand {
+  // (Required) The relation that this view will be built on.
+  Relation input = 1;
+
+  // (Required) View name.
+  string name = 2;
+
+  // (Required) Whether this is global temp view or local temp view.
+  bool is_global = 3;
+
+  // (Required)
+  //
+  // If true, and if the view already exists, updates it; if false, and if the 
view
+  // already exists, throws exception.
+  bool replace = 4;
+}
+
+// As writes are not directly handled during analysis and planning, they are 
modeled as commands.
+message WriteOperation {
+  // (Required) The output of the `input` relation will be persisted according 
to the options.
+  Relation input = 1;
+
+  // (Optional) Format value according to the Spark documentation. Examples 
are: text, parquet, delta.
+  optional string source = 2;
+
+  // (Optional)
+  //
+  // The destination of the write operation can be either a path or a table.
+  // If the destination is neither a path nor a table, such as jdbc and noop,
+  // the `save_type` should not be set.
+  oneof save_type {
+    string path = 3;
+    SaveTable table = 4;
+  }
+
+  // (Required) the save mode.
+  SaveMode mode = 5;
+
+  // (Optional) List of columns to sort the output by.
+  repeated string sort_column_names = 6;
+
+  // (Optional) List of columns for partitioning.
+  repeated string partitioning_columns = 7;
+
+  // (Optional) Bucketing specification. Bucketing must set the number of 
buckets and the columns
+  // to bucket by.
+  BucketBy bucket_by = 8;
+
+  // (Optional) A list of configuration options.
+  map<string, string> options = 9;
+
+  // (Optional) Columns used for clustering the table.
+  repeated string clustering_columns = 10;
+
+  message SaveTable {
+    // (Required) The table name.
+    string table_name = 1;
+    // (Required) The method to be called to write to the table.
+    TableSaveMethod save_method = 2;
+
+    enum TableSaveMethod {
+      TABLE_SAVE_METHOD_UNSPECIFIED = 0;
+      TABLE_SAVE_METHOD_SAVE_AS_TABLE = 1;
+      TABLE_SAVE_METHOD_INSERT_INTO = 2;
+    }
+  }
+
+  message BucketBy {
+    repeated string bucket_column_names = 1;
+    int32 num_buckets = 2;
+  }
+
+  enum SaveMode {
+    SAVE_MODE_UNSPECIFIED = 0;
+    SAVE_MODE_APPEND = 1;
+    SAVE_MODE_OVERWRITE = 2;
+    SAVE_MODE_ERROR_IF_EXISTS = 3;
+    SAVE_MODE_IGNORE = 4;
+  }
+}
+
+// As writes are not directly handled during analysis and planning, they are 
modeled as commands.
+message WriteOperationV2 {
+  // (Required) The output of the `input` relation will be persisted according 
to the options.
+  Relation input = 1;
+
+  // (Required) The destination of the write operation must be either a path 
or a table.
+  string table_name = 2;
+
+  // (Optional) A provider for the underlying output data source. Spark's 
default catalog supports
+  // "parquet", "json", etc.
+  optional string provider = 3;
+
+  // (Optional) List of columns for partitioning for output table created by 
`create`,
+  // `createOrReplace`, or `replace`
+  repeated Expression partitioning_columns = 4;
+
+  // (Optional) A list of configuration options.
+  map<string, string> options = 5;
+
+  // (Optional) A list of table properties.
+  map<string, string> table_properties = 6;
+
+  // (Required) Write mode.
+  Mode mode = 7;
+
+  enum Mode {
+    MODE_UNSPECIFIED = 0;
+    MODE_CREATE = 1;
+    MODE_OVERWRITE = 2;
+    MODE_OVERWRITE_PARTITIONS = 3;
+    MODE_APPEND = 4;
+    MODE_REPLACE = 5;
+    MODE_CREATE_OR_REPLACE = 6;
+  }
+
+  // (Optional) A condition for overwrite saving mode
+  Expression overwrite_condition = 8;
+
+  // (Optional) Columns used for clustering the table.
+  repeated string clustering_columns = 9;
+}
+
+// Starts write stream operation as streaming query. Query ID and Run ID of 
the streaming
+// query are returned.
+message WriteStreamOperationStart {
+
+  // (Required) The output of the `input` streaming relation will be written.
+  Relation input = 1;
+
+  // The following fields directly map to API for DataStreamWriter().
+  // Consult API documentation unless explicitly documented here.
+
+  string format = 2;
+  map<string, string> options = 3;
+  repeated string partitioning_column_names = 4;
+
+  oneof trigger {
+    string processing_time_interval = 5;
+    bool available_now = 6;
+    bool once = 7;
+    string continuous_checkpoint_interval = 8;
+  }
+
+  string output_mode = 9;
+  string query_name = 10;
+
+  // The destination is optional. When set, it can be a path or a table name.
+  oneof sink_destination {
+    string path = 11;
+    string table_name = 12;
+  }
+
+  StreamingForeachFunction foreach_writer = 13;
+  StreamingForeachFunction foreach_batch = 14;
+
+  // (Optional) Columns used for clustering the table.
+  repeated string clustering_column_names = 15;
+}
+
+message StreamingForeachFunction {
+  oneof function {
+    PythonUDF python_function = 1;
+    ScalarScalaUDF scala_function = 2;
+  }
+}
+
+message WriteStreamOperationStartResult {
+
+  // (Required) Query instance. See `StreamingQueryInstanceId`.
+  StreamingQueryInstanceId query_id = 1;
+
+  // An optional query name.
+  string name = 2;
+
+  // Optional query started event if there is any listener registered on the 
client side.
+  optional string query_started_event_json = 3;
+
+  // TODO: How do we indicate errors?
+  // TODO: Consider adding status, last progress etc here.
+}
+
+// A tuple that uniquely identifies an instance of streaming query run. It 
consists of `id` that
+// persists across the streaming runs and `run_id` that changes between each 
run of the
+// streaming query that resumes from the checkpoint.
+message StreamingQueryInstanceId {
+
+  // (Required) The unique id of this query that persists across restarts from 
checkpoint data.
+  // That is, this id is generated when a query is started for the first time, 
and
+  // will be the same every time it is restarted from checkpoint data.
+  string id = 1;
+
+  // (Required) The unique id of this run of the query. That is, every 
start/restart of a query
+  // will generate a unique run_id. Therefore, every time a query is restarted 
from
+  // checkpoint, it will have the same `id` but different `run_id`s.
+  string run_id = 2;
+}
+
+// Commands for a streaming query.
+message StreamingQueryCommand {
+
+  // (Required) Query instance. See `StreamingQueryInstanceId`.
+  StreamingQueryInstanceId query_id = 1;
+
+  // See documentation for the corresponding API method in StreamingQuery.
+  oneof command {
+    // status() API.
+    bool status = 2;
+    // lastProgress() API.
+    bool last_progress = 3;
+    // recentProgress() API.
+    bool recent_progress = 4;
+    // stop() API. Stops the query.
+    bool stop = 5;
+    // processAllAvailable() API. Waits till all the available data is 
processed
+    bool process_all_available = 6;
+    // explain() API. Returns logical and physical plans.
+    ExplainCommand explain = 7;
+    // exception() API. Returns the exception in the query if any.
+    bool exception = 8;
+    // awaitTermination() API. Waits for the termination of the query.
+    AwaitTerminationCommand await_termination = 9;
+  }
+
+  message ExplainCommand {
+    // TODO: Consider reusing Explain from AnalyzePlanRequest message.
+    //       We can not do this right now since it base.proto imports this 
file.
+    bool extended = 1;
+  }
+
+  message AwaitTerminationCommand {
+    optional int64 timeout_ms = 2;
+  }
+}
+
+// Response for commands on a streaming query.
+message StreamingQueryCommandResult {
+  // (Required) Query instance id. See `StreamingQueryInstanceId`.
+  StreamingQueryInstanceId query_id = 1;
+
+  oneof result_type {
+    StatusResult status = 2;
+    RecentProgressResult recent_progress = 3;
+    ExplainResult explain = 4;
+    ExceptionResult exception = 5;
+    AwaitTerminationResult await_termination = 6;
+  }
+
+  message StatusResult {
+    // See documentation for these Scala 'StreamingQueryStatus' struct
+    string status_message = 1;
+    bool is_data_available = 2;
+    bool is_trigger_active = 3;
+    bool is_active = 4;
+  }
+
+  message RecentProgressResult {
+    // Progress reports as an array of json strings.
+    repeated string recent_progress_json = 5;
+  }
+
+  message ExplainResult {
+    // Logical and physical plans as string
+    string result = 1;
+  }
+
+  message ExceptionResult {
+    // (Optional) Exception message as string, maps to the return value of 
original
+    // StreamingQueryException's toString method
+    optional string exception_message = 1;
+    // (Optional) Exception error class as string
+    optional string error_class = 2;
+    // (Optional) Exception stack trace as string
+    optional string stack_trace = 3;
+  }
+
+  message AwaitTerminationResult {
+    bool terminated = 1;
+  }
+}
+
+// Commands for the streaming query manager.
+message StreamingQueryManagerCommand {
+
+  // See documentation for the corresponding API method in 
StreamingQueryManager.
+  oneof command {
+    // active() API, returns a list of active queries.
+    bool active = 1;
+    // get() API, returns the StreamingQuery identified by id.
+    string get_query = 2;
+    // awaitAnyTermination() API, wait until any query terminates or timeout.
+    AwaitAnyTerminationCommand await_any_termination = 3;
+    // resetTerminated() API.
+    bool reset_terminated = 4;
+    // addListener API.
+    StreamingQueryListenerCommand add_listener = 5;
+    // removeListener API.
+    StreamingQueryListenerCommand remove_listener = 6;
+    // listListeners() API, returns a list of streaming query listeners.
+    bool list_listeners = 7;
+  }
+
+  message AwaitAnyTerminationCommand {
+    // (Optional) The waiting time in milliseconds to wait for any query to 
terminate.
+    optional int64 timeout_ms = 1;
+  }
+
+  message StreamingQueryListenerCommand {
+    bytes listener_payload = 1;
+    optional PythonUDF python_listener_payload = 2;
+    string id = 3;
+  }
+}
+
+// Response for commands on the streaming query manager.
+message StreamingQueryManagerCommandResult {
+  oneof result_type {
+    ActiveResult active = 1;
+    StreamingQueryInstance query = 2;
+    AwaitAnyTerminationResult await_any_termination = 3;
+    bool reset_terminated = 4;
+    bool add_listener = 5;
+    bool remove_listener = 6;
+    ListStreamingQueryListenerResult list_listeners = 7;
+  }
+
+  message ActiveResult {
+    repeated StreamingQueryInstance active_queries = 1;
+  }
+
+  message StreamingQueryInstance {
+    // (Required) The id and runId of this query.
+    StreamingQueryInstanceId id = 1;
+    // (Optional) The name of this query.
+    optional string name = 2;
+  }
+
+  message AwaitAnyTerminationResult {
+    bool terminated = 1;
+  }
+
+  message StreamingQueryListenerInstance {
+    bytes listener_payload = 1;
+  }
+
+  message ListStreamingQueryListenerResult {
+    // (Required) Reference IDs of listener instances.
+    repeated string listener_ids = 1;
+  }
+}
+
+// The protocol for client-side StreamingQueryListener.
+// This command will only be set when either the first listener is added to 
the client, or the last
+// listener is removed from the client.
+// The add_listener_bus_listener command will only be set true in the first 
case.
+// The remove_listener_bus_listener command will only be set true in the 
second case.
+message StreamingQueryListenerBusCommand {
+  oneof command {
+    bool add_listener_bus_listener = 1;
+    bool remove_listener_bus_listener = 2;
+  }
+}
+
+// The enum used for client side streaming query listener event
+// There is no QueryStartedEvent defined here,
+// it is added as a field in WriteStreamOperationStartResult
+enum StreamingQueryEventType {
+  QUERY_PROGRESS_UNSPECIFIED = 0;
+  QUERY_PROGRESS_EVENT = 1;
+  QUERY_TERMINATED_EVENT = 2;
+  QUERY_IDLE_EVENT = 3;
+}
+
+// The protocol for the returned events in the long-running response channel.
+message StreamingQueryListenerEvent {
+  // (Required) The json serialized event, all StreamingQueryListener events 
have a json method
+  string event_json = 1;
+  // (Required) Query event type used by client to decide how to deserialize 
the event_json
+  StreamingQueryEventType event_type = 2;
+}
+
+message StreamingQueryListenerEventsResult {
+  repeated StreamingQueryListenerEvent events = 1;
+  optional bool listener_bus_listener_added = 2;
+}
+
+// Command to get the output of 'SparkContext.resources'
+message GetResourcesCommand { }
+
+// Response for command 'GetResourcesCommand'.
+message GetResourcesCommandResult {
+  map<string, ResourceInformation> resources = 1;
+}
+
+// Command to create ResourceProfile
+message CreateResourceProfileCommand {
+  // (Required) The ResourceProfile to be built on the server-side.
+  ResourceProfile profile = 1;
+}
+
+// Response for command 'CreateResourceProfileCommand'.
+message CreateResourceProfileCommandResult {
+  // (Required) Server-side generated resource profile id.
+  int32 profile_id = 1;
+}
+
+// Command to remove `CashedRemoteRelation`
+message RemoveCachedRemoteRelationCommand {
+  // (Required) The remote to be related
+  CachedRemoteRelation relation = 1;
+}
+
+message CheckpointCommand {
+  // (Required) The logical plan to checkpoint.
+  Relation relation = 1;
+
+  // (Required) Locally checkpoint using a local temporary
+  // directory in Spark Connect server (Spark Driver)
+  bool local = 2;
+
+  // (Required) Whether to checkpoint this dataframe immediately.
+  bool eager = 3;
+}
+
+message MergeIntoTableCommand {
+  // (Required) The name of the target table.
+  string target_table_name = 1;
+
+  // (Required) The relation of the source table.
+  Relation source_table_plan = 2;
+
+  // (Required) The condition to match the source and target.
+  Expression merge_condition = 3;
+
+  // (Optional) The actions to be taken when the condition is matched.
+  repeated Expression match_actions = 4;
+
+  // (Optional) The actions to be taken when the condition is not matched.
+  repeated Expression not_matched_actions = 5;
+
+  // (Optional) The actions to be taken when the condition is not matched by 
source.
+  repeated Expression not_matched_by_source_actions = 6;
+
+  // (Required) Whether to enable schema evolution.
+  bool with_schema_evolution = 7;
+}
diff --git 
a/kyuubi-relocated-spark-connect-rpc/src/main/protobuf/spark/connect/common.proto
 
b/kyuubi-relocated-spark-connect-rpc/src/main/protobuf/spark/connect/common.proto
new file mode 100644
index 0000000..e156aa4
--- /dev/null
+++ 
b/kyuubi-relocated-spark-connect-rpc/src/main/protobuf/spark/connect/common.proto
@@ -0,0 +1,98 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+syntax = 'proto3';
+
+package spark.connect;
+
+option java_multiple_files = true;
+option java_package = "org.apache.kyuubi.shaded.spark.connect.proto";
+option go_package = "internal/generated";
+
+// StorageLevel for persisting Datasets/Tables.
+message StorageLevel {
+  // (Required) Whether the cache should use disk or not.
+  bool use_disk = 1;
+  // (Required) Whether the cache should use memory or not.
+  bool use_memory = 2;
+  // (Required) Whether the cache should use off-heap or not.
+  bool use_off_heap = 3;
+  // (Required) Whether the cached data is deserialized or not.
+  bool deserialized = 4;
+  // (Required) The number of replicas.
+  int32 replication = 5;
+}
+
+
+// ResourceInformation to hold information about a type of Resource.
+// The corresponding class is 'org.apache.spark.resource.ResourceInformation'
+message ResourceInformation {
+  // (Required) The name of the resource
+  string name = 1;
+  // (Required) An array of strings describing the addresses of the resource.
+  repeated string addresses = 2;
+}
+
+// An executor resource request.
+message ExecutorResourceRequest {
+  // (Required) resource name.
+  string resource_name = 1;
+
+  // (Required) resource amount requesting.
+  int64 amount = 2;
+
+  // Optional script used to discover the resources.
+  optional string discovery_script = 3;
+
+  // Optional vendor, required for some cluster managers.
+  optional string vendor = 4;
+}
+
+// A task resource request.
+message TaskResourceRequest {
+  // (Required) resource name.
+  string resource_name = 1;
+
+  // (Required) resource amount requesting as a double to support fractional
+  // resource requests.
+  double amount = 2;
+}
+
+message ResourceProfile {
+  // (Optional) Resource requests for executors. Mapped from the resource name
+  // (e.g., cores, memory, CPU) to its specific request.
+  map<string, ExecutorResourceRequest> executor_resources = 1;
+
+  // (Optional) Resource requests for tasks. Mapped from the resource name
+  // (e.g., cores, memory, CPU) to its specific request.
+  map<string, TaskResourceRequest> task_resources = 2;
+}
+
+message Origin {
+  // (Required) Indicate the origin type.
+  oneof function {
+    PythonOrigin python_origin = 1;
+  }
+}
+
+message PythonOrigin {
+  // (Required) Name of the origin, for example, the name of the function
+  string fragment = 1;
+
+  // (Required) Callsite to show to end users, for example, stacktrace.
+  string call_site = 2;
+}
diff --git 
a/kyuubi-relocated-spark-connect-rpc/src/main/protobuf/spark/connect/example_plugins.proto
 
b/kyuubi-relocated-spark-connect-rpc/src/main/protobuf/spark/connect/example_plugins.proto
new file mode 100644
index 0000000..51faef4
--- /dev/null
+++ 
b/kyuubi-relocated-spark-connect-rpc/src/main/protobuf/spark/connect/example_plugins.proto
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+syntax = 'proto3';
+
+import "spark/connect/relations.proto";
+import "spark/connect/expressions.proto";
+option go_package = "internal/generated";
+
+package spark.connect;
+
+option java_multiple_files = true;
+option java_package = "org.apache.kyuubi.shaded.spark.connect.proto";
+
+message ExamplePluginRelation {
+  Relation input = 1;
+  string custom_field = 2;
+
+}
+
+message ExamplePluginExpression {
+  Expression child = 1;
+  string custom_field = 2;
+}
+
+message ExamplePluginCommand {
+  string custom_field = 1;
+}
\ No newline at end of file
diff --git 
a/kyuubi-relocated-spark-connect-rpc/src/main/protobuf/spark/connect/expressions.proto
 
b/kyuubi-relocated-spark-connect-rpc/src/main/protobuf/spark/connect/expressions.proto
new file mode 100644
index 0000000..a11b0c1
--- /dev/null
+++ 
b/kyuubi-relocated-spark-connect-rpc/src/main/protobuf/spark/connect/expressions.proto
@@ -0,0 +1,453 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+syntax = 'proto3';
+
+import "google/protobuf/any.proto";
+import "spark/connect/types.proto";
+import "spark/connect/common.proto";
+
+package spark.connect;
+
+option java_multiple_files = true;
+option java_package = "org.apache.kyuubi.shaded.spark.connect.proto";
+option go_package = "internal/generated";
+
+// Expression used to refer to fields, functions and similar. This can be used 
everywhere
+// expressions in SQL appear.
+message Expression {
+
+  ExpressionCommon common = 18;
+  oneof expr_type {
+    Literal literal = 1;
+    UnresolvedAttribute unresolved_attribute = 2;
+    UnresolvedFunction unresolved_function = 3;
+    ExpressionString expression_string = 4;
+    UnresolvedStar unresolved_star = 5;
+    Alias alias = 6;
+    Cast cast = 7;
+    UnresolvedRegex unresolved_regex = 8;
+    SortOrder sort_order = 9;
+    LambdaFunction lambda_function = 10;
+    Window window = 11;
+    UnresolvedExtractValue unresolved_extract_value = 12;
+    UpdateFields update_fields = 13;
+    UnresolvedNamedLambdaVariable unresolved_named_lambda_variable = 14;
+    CommonInlineUserDefinedFunction common_inline_user_defined_function = 15;
+    CallFunction call_function = 16;
+    NamedArgumentExpression named_argument_expression = 17;
+    MergeAction merge_action = 19;
+    TypedAggregateExpression typed_aggregate_expression = 20;
+
+    // This field is used to mark extensions to the protocol. When plugins 
generate arbitrary
+    // relations they can add them here. During the planning the correct 
resolution is done.
+    google.protobuf.Any extension = 999;
+  }
+
+
+  // Expression for the OVER clause or WINDOW clause.
+  message Window {
+
+    // (Required) The window function.
+    Expression window_function = 1;
+
+    // (Optional) The way that input rows are partitioned.
+    repeated Expression partition_spec = 2;
+
+    // (Optional) Ordering of rows in a partition.
+    repeated SortOrder order_spec = 3;
+
+    // (Optional) Window frame in a partition.
+    //
+    // If not set, it will be treated as 'UnspecifiedFrame'.
+    WindowFrame frame_spec = 4;
+
+    // The window frame
+    message WindowFrame {
+
+      // (Required) The type of the frame.
+      FrameType frame_type = 1;
+
+      // (Required) The lower bound of the frame.
+      FrameBoundary lower = 2;
+
+      // (Required) The upper bound of the frame.
+      FrameBoundary upper = 3;
+
+      enum FrameType {
+        FRAME_TYPE_UNDEFINED = 0;
+
+        // RowFrame treats rows in a partition individually.
+        FRAME_TYPE_ROW = 1;
+
+        // RangeFrame treats rows in a partition as groups of peers.
+        // All rows having the same 'ORDER BY' ordering are considered as 
peers.
+        FRAME_TYPE_RANGE = 2;
+      }
+
+      message FrameBoundary {
+        oneof boundary {
+          // CURRENT ROW boundary
+          bool current_row = 1;
+
+          // UNBOUNDED boundary.
+          // For lower bound, it will be converted to 'UnboundedPreceding'.
+          // for upper bound, it will be converted to 'UnboundedFollowing'.
+          bool unbounded = 2;
+
+          // This is an expression for future proofing. We are expecting 
literals on the server side.
+          Expression value = 3;
+        }
+      }
+    }
+  }
+
+  // SortOrder is used to specify the  data ordering, it is normally used in 
Sort and Window.
+  // It is an unevaluable expression and cannot be evaluated, so can not be 
used in Projection.
+  message SortOrder {
+    // (Required) The expression to be sorted.
+    Expression child = 1;
+
+    // (Required) The sort direction, should be ASCENDING or DESCENDING.
+    SortDirection direction = 2;
+
+    // (Required) How to deal with NULLs, should be NULLS_FIRST or NULLS_LAST.
+    NullOrdering null_ordering = 3;
+
+    enum SortDirection {
+      SORT_DIRECTION_UNSPECIFIED = 0;
+      SORT_DIRECTION_ASCENDING = 1;
+      SORT_DIRECTION_DESCENDING = 2;
+    }
+
+    enum NullOrdering {
+      SORT_NULLS_UNSPECIFIED = 0;
+      SORT_NULLS_FIRST = 1;
+      SORT_NULLS_LAST = 2;
+    }
+  }
+
+  message Cast {
+    // (Required) the expression to be casted.
+    Expression expr = 1;
+
+    // (Required) the data type that the expr to be casted to.
+    oneof cast_to_type {
+      DataType type = 2;
+      // If this is set, Server will use Catalyst parser to parse this string 
to DataType.
+      string type_str = 3;
+    }
+
+    // (Optional) The expression evaluation mode.
+    EvalMode eval_mode = 4;
+
+    enum EvalMode {
+      EVAL_MODE_UNSPECIFIED = 0;
+      EVAL_MODE_LEGACY = 1;
+      EVAL_MODE_ANSI = 2;
+      EVAL_MODE_TRY = 3;
+    }
+  }
+
+  message Literal {
+    oneof literal_type {
+      DataType null = 1;
+      bytes binary = 2;
+      bool boolean = 3;
+
+      int32 byte = 4;
+      int32 short = 5;
+      int32 integer = 6;
+      int64 long = 7;
+      float float = 10;
+      double double = 11;
+      Decimal decimal = 12;
+
+      string string = 13;
+
+      // Date in units of days since the UNIX epoch.
+      int32 date = 16;
+      // Timestamp in units of microseconds since the UNIX epoch.
+      int64 timestamp = 17;
+      // Timestamp in units of microseconds since the UNIX epoch (without 
timezone information).
+      int64 timestamp_ntz = 18;
+
+      CalendarInterval calendar_interval = 19;
+      int32 year_month_interval = 20;
+      int64 day_time_interval = 21;
+      Array array = 22;
+      Map map = 23;
+      Struct struct = 24;
+    }
+
+    message Decimal {
+      // the string representation.
+      string value = 1;
+      // The maximum number of digits allowed in the value.
+      // the maximum precision is 38.
+      optional int32 precision = 2;
+      // declared scale of decimal literal
+      optional int32 scale = 3;
+    }
+
+    message CalendarInterval {
+      int32 months = 1;
+      int32 days = 2;
+      int64 microseconds = 3;
+    }
+
+    message Array {
+      DataType element_type = 1;
+      repeated Literal elements = 2;
+    }
+
+    message Map {
+      DataType key_type = 1;
+      DataType value_type = 2;
+      repeated Literal keys = 3;
+      repeated Literal values = 4;
+    }
+
+    message Struct {
+      DataType struct_type = 1;
+      repeated Literal elements = 2;
+    }
+  }
+
+  // An unresolved attribute that is not explicitly bound to a specific 
column, but the column
+  // is resolved during analysis by name.
+  message UnresolvedAttribute {
+    // (Required) An identifier that will be parsed by Catalyst parser. This 
should follow the
+    // Spark SQL identifier syntax.
+    string unparsed_identifier = 1;
+
+    // (Optional) The id of corresponding connect plan.
+    optional int64 plan_id = 2;
+
+    // (Optional) The requested column is a metadata column.
+    optional bool is_metadata_column = 3;
+  }
+
+  // An unresolved function is not explicitly bound to one explicit function, 
but the function
+  // is resolved during analysis following Sparks name resolution rules.
+  message UnresolvedFunction {
+    // (Required) name (or unparsed name for user defined function) for the 
unresolved function.
+    string function_name = 1;
+
+    // (Optional) Function arguments. Empty arguments are allowed.
+    repeated Expression arguments = 2;
+
+    // (Required) Indicate if this function should be applied on distinct 
values.
+    bool is_distinct = 3;
+
+    // (Required) Indicate if this is a user defined function.
+    //
+    // When it is not a user defined function, Connect will use the function 
name directly.
+    // When it is a user defined function, Connect will parse the function 
name first.
+    bool is_user_defined_function = 4;
+  }
+
+  // Expression as string.
+  message ExpressionString {
+    // (Required) A SQL expression that will be parsed by Catalyst parser.
+    string expression = 1;
+  }
+
+  // UnresolvedStar is used to expand all the fields of a relation or struct.
+  message UnresolvedStar {
+
+    // (Optional) The target of the expansion.
+    //
+    // If set, it should end with '.*' and will be parsed by 
'parseAttributeName'
+    // in the server side.
+    optional string unparsed_target = 1;
+
+    // (Optional) The id of corresponding connect plan.
+    optional int64 plan_id = 2;
+  }
+
+  // Represents all of the input attributes to a given relational operator, 
for example in
+  // "SELECT `(id)?+.+` FROM ...".
+  message UnresolvedRegex {
+    // (Required) The column name used to extract column with regex.
+    string col_name = 1;
+
+    // (Optional) The id of corresponding connect plan.
+    optional int64 plan_id = 2;
+  }
+
+  // Extracts a value or values from an Expression
+  message UnresolvedExtractValue {
+    // (Required) The expression to extract value from, can be
+    // Map, Array, Struct or array of Structs.
+    Expression child = 1;
+
+    // (Required) The expression to describe the extraction, can be
+    // key of Map, index of Array, field name of Struct.
+    Expression extraction = 2;
+  }
+
+  // Add, replace or drop a field of `StructType` expression by name.
+  message UpdateFields {
+    // (Required) The struct expression.
+    Expression struct_expression = 1;
+
+    // (Required) The field name.
+    string field_name = 2;
+
+    // (Optional) The expression to add or replace.
+    //
+    // When not set, it means this field will be dropped.
+    Expression value_expression = 3;
+  }
+
+  message Alias {
+    // (Required) The expression that alias will be added on.
+    Expression expr = 1;
+
+    // (Required) a list of name parts for the alias.
+    //
+    // Scalar columns only has one name that presents.
+    repeated string name = 2;
+
+    // (Optional) Alias metadata expressed as a JSON map.
+    optional string metadata = 3;
+  }
+
+  message LambdaFunction {
+    // (Required) The lambda function.
+    //
+    // The function body should use 'UnresolvedAttribute' as arguments, the 
sever side will
+    // replace 'UnresolvedAttribute' with 'UnresolvedNamedLambdaVariable'.
+    Expression function = 1;
+
+    // (Required) Function variables. Must contains 1 ~ 3 variables.
+    repeated Expression.UnresolvedNamedLambdaVariable arguments = 2;
+  }
+
+  message UnresolvedNamedLambdaVariable {
+
+    // (Required) a list of name parts for the variable. Must not be empty.
+    repeated string name_parts = 1;
+  }
+}
+
+message ExpressionCommon {
+  // (Required) Keep the information of the origin for this expression such as 
stacktrace.
+  Origin origin = 1;
+}
+
+message CommonInlineUserDefinedFunction {
+  // (Required) Name of the user-defined function.
+  string function_name = 1;
+  // (Optional) Indicate if the user-defined function is deterministic.
+  bool deterministic = 2;
+  // (Optional) Function arguments. Empty arguments are allowed.
+  repeated Expression arguments = 3;
+  // (Required) Indicate the function type of the user-defined function.
+  oneof function {
+    PythonUDF python_udf = 4;
+    ScalarScalaUDF scalar_scala_udf = 5;
+    JavaUDF java_udf = 6;
+  }
+}
+
+message PythonUDF {
+  // (Required) Output type of the Python UDF
+  DataType output_type = 1;
+  // (Required) EvalType of the Python UDF
+  int32 eval_type = 2;
+  // (Required) The encoded commands of the Python UDF
+  bytes command = 3;
+  // (Required) Python version being used in the client.
+  string python_ver = 4;
+  // (Optional) Additional includes for the Python UDF.
+  repeated string additional_includes = 5;
+}
+
+message ScalarScalaUDF {
+  // (Required) Serialized JVM object containing UDF definition, input 
encoders and output encoder
+  bytes payload = 1;
+  // (Optional) Input type(s) of the UDF
+  repeated DataType inputTypes = 2;
+  // (Required) Output type of the UDF
+  DataType outputType = 3;
+  // (Required) True if the UDF can return null value
+  bool nullable = 4;
+  // (Required) Indicate if the UDF is an aggregate function
+  bool aggregate = 5;
+}
+
+message JavaUDF {
+  // (Required) Fully qualified name of Java class
+  string class_name = 1;
+
+  // (Optional) Output type of the Java UDF
+  optional DataType output_type = 2;
+
+  // (Required) Indicate if the Java user-defined function is an aggregate 
function
+  bool aggregate = 3;
+}
+
+message TypedAggregateExpression {
+  // (Required) The aggregate function object packed into bytes.
+  ScalarScalaUDF scalar_scala_udf = 1;
+}
+
+message CallFunction {
+  // (Required) Unparsed name of the SQL function.
+  string function_name = 1;
+
+  // (Optional) Function arguments. Empty arguments are allowed.
+  repeated Expression arguments = 2;
+}
+
+message NamedArgumentExpression {
+  // (Required) The key of the named argument.
+  string key = 1;
+
+  // (Required) The value expression of the named argument.
+  Expression value = 2;
+}
+
+message MergeAction {
+  // (Required) The action type of the merge action.
+  ActionType action_type = 1;
+
+  // (Optional) The condition expression of the merge action.
+  optional Expression condition = 2;
+
+  // (Optional) The assignments of the merge action. Required for ActionTypes 
INSERT and UPDATE.
+  repeated Assignment assignments = 3;
+
+  enum ActionType {
+    ACTION_TYPE_INVALID = 0;
+    ACTION_TYPE_DELETE = 1;
+    ACTION_TYPE_INSERT = 2;
+    ACTION_TYPE_INSERT_STAR = 3;
+    ACTION_TYPE_UPDATE = 4;
+    ACTION_TYPE_UPDATE_STAR = 5;
+  }
+
+  message Assignment {
+    // (Required) The key of the assignment.
+    Expression key = 1;
+
+    // (Required) The value of the assignment.
+    Expression value = 2;
+  }
+}
diff --git 
a/kyuubi-relocated-spark-connect-rpc/src/main/protobuf/spark/connect/relations.proto
 
b/kyuubi-relocated-spark-connect-rpc/src/main/protobuf/spark/connect/relations.proto
new file mode 100644
index 0000000..1535776
--- /dev/null
+++ 
b/kyuubi-relocated-spark-connect-rpc/src/main/protobuf/spark/connect/relations.proto
@@ -0,0 +1,1120 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+syntax = 'proto3';
+
+package spark.connect;
+
+import "google/protobuf/any.proto";
+import "spark/connect/expressions.proto";
+import "spark/connect/types.proto";
+import "spark/connect/catalog.proto";
+import "spark/connect/common.proto";
+
+option java_multiple_files = true;
+option java_package = "org.apache.kyuubi.shaded.spark.connect.proto";
+option go_package = "internal/generated";
+
+// The main [[Relation]] type. Fundamentally, a relation is a typed container
+// that has exactly one explicit relation type set.
+//
+// When adding new relation types, they have to be registered here.
+message Relation {
+  RelationCommon common = 1;
+  oneof rel_type {
+    Read read = 2;
+    Project project = 3;
+    Filter filter = 4;
+    Join join = 5;
+    SetOperation set_op = 6;
+    Sort sort = 7;
+    Limit limit = 8;
+    Aggregate aggregate = 9;
+    SQL sql = 10;
+    LocalRelation local_relation = 11;
+    Sample sample = 12;
+    Offset offset = 13;
+    Deduplicate deduplicate = 14;
+    Range range = 15;
+    SubqueryAlias subquery_alias = 16;
+    Repartition repartition = 17;
+    ToDF to_df = 18;
+    WithColumnsRenamed with_columns_renamed = 19;
+    ShowString show_string = 20;
+    Drop drop = 21;
+    Tail tail = 22;
+    WithColumns with_columns = 23;
+    Hint hint = 24;
+    Unpivot unpivot = 25;
+    ToSchema to_schema = 26;
+    RepartitionByExpression repartition_by_expression = 27;
+    MapPartitions map_partitions = 28;
+    CollectMetrics collect_metrics = 29;
+    Parse parse = 30;
+    GroupMap group_map = 31;
+    CoGroupMap co_group_map = 32;
+    WithWatermark with_watermark = 33;
+    ApplyInPandasWithState apply_in_pandas_with_state = 34;
+    HtmlString html_string = 35;
+    CachedLocalRelation cached_local_relation = 36;
+    CachedRemoteRelation cached_remote_relation = 37;
+    CommonInlineUserDefinedTableFunction 
common_inline_user_defined_table_function = 38;
+    AsOfJoin as_of_join = 39;
+    CommonInlineUserDefinedDataSource common_inline_user_defined_data_source = 
40;
+    WithRelations with_relations = 41;
+
+    // NA functions
+    NAFill fill_na = 90;
+    NADrop drop_na = 91;
+    NAReplace replace = 92;
+
+    // stat functions
+    StatSummary summary = 100;
+    StatCrosstab crosstab = 101;
+    StatDescribe describe = 102;
+    StatCov cov = 103;
+    StatCorr corr = 104;
+    StatApproxQuantile approx_quantile = 105;
+    StatFreqItems freq_items = 106;
+    StatSampleBy sample_by = 107;
+
+    // Catalog API (experimental / unstable)
+    Catalog catalog = 200;
+
+    // This field is used to mark extensions to the protocol. When plugins 
generate arbitrary
+    // relations they can add them here. During the planning the correct 
resolution is done.
+    google.protobuf.Any extension = 998;
+    Unknown unknown = 999;
+  }
+}
+
+// Used for testing purposes only.
+message Unknown {}
+
+// Common metadata of all relations.
+message RelationCommon {
+  // (Required) Shared relation metadata.
+  string source_info = 1 [deprecated=true];
+
+  // (Optional) A per-client globally unique id for a given connect plan.
+  optional int64 plan_id = 2;
+
+  // (Optional) Keep the information of the origin for this expression such as 
stacktrace.
+  Origin origin = 3;
+}
+
+// Relation that uses a SQL query to generate the output.
+message SQL {
+  // (Required) The SQL query.
+  string query = 1;
+
+  // (Optional) A map of parameter names to literal expressions.
+  map<string, Expression.Literal> args = 2 [deprecated=true];
+
+  // (Optional) A sequence of literal expressions for positional parameters in 
the SQL query text.
+  repeated Expression.Literal pos_args = 3 [deprecated=true];
+
+  // (Optional) A map of parameter names to expressions.
+  // It cannot coexist with `pos_arguments`.
+  map<string, Expression> named_arguments = 4;
+
+  // (Optional) A sequence of expressions for positional parameters in the SQL 
query text.
+  // It cannot coexist with `named_arguments`.
+  repeated Expression pos_arguments = 5;
+}
+
+// Relation of type [[WithRelations]].
+//
+// This relation contains a root plan, and one or more references that are 
used by the root plan.
+// There are two ways of referencing a relation, by name (through a subquery 
alias), or by plan_id
+// (using RelationCommon.plan_id).
+//
+// This relation can be used to implement CTEs, describe DAGs, or to reduce 
tree depth.
+message WithRelations {
+  // (Required) Plan at the root of the query tree. This plan is expected to 
contain one or more
+  // references. Those references get expanded later on by the engine.
+  Relation root = 1;
+
+  // (Required) Plans referenced by the root plan. Relations in this list are 
also allowed to
+  // contain references to other relations in this list, as long they do not 
form cycles.
+  repeated Relation references = 2;
+}
+
+// Relation that reads from a file / table or other data source. Does not have 
additional
+// inputs.
+message Read {
+  oneof read_type {
+    NamedTable named_table = 1;
+    DataSource data_source = 2;
+  }
+
+  // (Optional) Indicates if this is a streaming read.
+  bool is_streaming = 3;
+
+  message NamedTable {
+    // (Required) Unparsed identifier for the table.
+    string unparsed_identifier = 1;
+
+    // Options for the named table. The map key is case insensitive.
+    map<string, string> options = 2;
+  }
+
+  message DataSource {
+    // (Optional) Supported formats include: parquet, orc, text, json, 
parquet, csv, avro.
+    //
+    // If not set, the value from SQL conf 'spark.sql.sources.default' will be 
used.
+    optional string format = 1;
+
+    // (Optional) If not set, Spark will infer the schema.
+    //
+    // This schema string should be either DDL-formatted or JSON-formatted.
+    optional string schema = 2;
+
+    // Options for the data source. The context of this map varies based on the
+    // data source format. This options could be empty for valid data source 
format.
+    // The map key is case insensitive.
+    map<string, string> options = 3;
+
+    // (Optional) A list of path for file-system backed data sources.
+    repeated string paths = 4;
+
+    // (Optional) Condition in the where clause for each partition.
+    //
+    // This is only supported by the JDBC data source.
+    repeated string predicates = 5;
+  }
+}
+
+// Projection of a bag of expressions for a given input relation.
+//
+// The input relation must be specified.
+// The projected expression can be an arbitrary expression.
+message Project {
+  // (Optional) Input relation is optional for Project.
+  //
+  // For example, `SELECT ABS(-1)` is valid plan without an input plan.
+  Relation input = 1;
+
+  // (Required) A Project requires at least one expression.
+  repeated Expression expressions = 3;
+}
+
+// Relation that applies a boolean expression `condition` on each row of 
`input` to produce
+// the output result.
+message Filter {
+  // (Required) Input relation for a Filter.
+  Relation input = 1;
+
+  // (Required) A Filter must have a condition expression.
+  Expression condition = 2;
+}
+
+// Relation of type [[Join]].
+//
+// `left` and `right` must be present.
+message Join {
+  // (Required) Left input relation for a Join.
+  Relation left = 1;
+
+  // (Required) Right input relation for a Join.
+  Relation right = 2;
+
+  // (Optional) The join condition. Could be unset when `using_columns` is 
utilized.
+  //
+  // This field does not co-exist with using_columns.
+  Expression join_condition = 3;
+
+  // (Required) The join type.
+  JoinType join_type = 4;
+
+  // Optional. using_columns provides a list of columns that should present on 
both sides of
+  // the join inputs that this Join will join on. For example A JOIN B USING 
col_name is
+  // equivalent to A JOIN B on A.col_name = B.col_name.
+  //
+  // This field does not co-exist with join_condition.
+  repeated string using_columns = 5;
+
+  enum JoinType {
+    JOIN_TYPE_UNSPECIFIED = 0;
+    JOIN_TYPE_INNER = 1;
+    JOIN_TYPE_FULL_OUTER = 2;
+    JOIN_TYPE_LEFT_OUTER = 3;
+    JOIN_TYPE_RIGHT_OUTER = 4;
+    JOIN_TYPE_LEFT_ANTI = 5;
+    JOIN_TYPE_LEFT_SEMI = 6;
+    JOIN_TYPE_CROSS = 7;
+  }
+
+  // (Optional) Only used by joinWith. Set the left and right join data types.
+  optional JoinDataType join_data_type = 6;
+
+  message JoinDataType {
+    // If the left data type is a struct.
+    bool is_left_struct = 1;
+    // If the right data type is a struct.
+    bool is_right_struct = 2;
+  }
+}
+
+// Relation of type [[SetOperation]]
+message SetOperation {
+  // (Required) Left input relation for a Set operation.
+  Relation left_input = 1;
+
+  // (Required) Right input relation for a Set operation.
+  Relation right_input = 2;
+
+  // (Required) The Set operation type.
+  SetOpType set_op_type = 3;
+
+  // (Optional) If to remove duplicate rows.
+  //
+  // True to preserve all results.
+  // False to remove duplicate rows.
+  optional bool is_all = 4;
+
+  // (Optional) If to perform the Set operation based on name resolution.
+  //
+  // Only UNION supports this option.
+  optional bool by_name = 5;
+
+  // (Optional) If to perform the Set operation and allow missing columns.
+  //
+  // Only UNION supports this option.
+  optional bool allow_missing_columns = 6;
+
+  enum SetOpType {
+    SET_OP_TYPE_UNSPECIFIED = 0;
+    SET_OP_TYPE_INTERSECT = 1;
+    SET_OP_TYPE_UNION = 2;
+    SET_OP_TYPE_EXCEPT = 3;
+  }
+}
+
+// Relation of type [[Limit]] that is used to `limit` rows from the input 
relation.
+message Limit {
+  // (Required) Input relation for a Limit.
+  Relation input = 1;
+
+  // (Required) the limit.
+  int32 limit = 2;
+}
+
+// Relation of type [[Offset]] that is used to read rows staring from the 
`offset` on
+// the input relation.
+message Offset {
+  // (Required) Input relation for an Offset.
+  Relation input = 1;
+
+  // (Required) the limit.
+  int32 offset = 2;
+}
+
+// Relation of type [[Tail]] that is used to fetch `limit` rows from the last 
of the input relation.
+message Tail {
+  // (Required) Input relation for an Tail.
+  Relation input = 1;
+
+  // (Required) the limit.
+  int32 limit = 2;
+}
+
+// Relation of type [[Aggregate]].
+message Aggregate {
+  // (Required) Input relation for a RelationalGroupedDataset.
+  Relation input = 1;
+
+  // (Required) How the RelationalGroupedDataset was built.
+  GroupType group_type = 2;
+
+  // (Required) Expressions for grouping keys
+  repeated Expression grouping_expressions = 3;
+
+  // (Required) List of values that will be translated to columns in the 
output DataFrame.
+  repeated Expression aggregate_expressions = 4;
+
+  // (Optional) Pivots a column of the current `DataFrame` and performs the 
specified aggregation.
+  Pivot pivot = 5;
+
+  // (Optional) List of values that will be translated to columns in the 
output DataFrame.
+  repeated GroupingSets grouping_sets = 6;
+
+  enum GroupType {
+    GROUP_TYPE_UNSPECIFIED = 0;
+    GROUP_TYPE_GROUPBY = 1;
+    GROUP_TYPE_ROLLUP = 2;
+    GROUP_TYPE_CUBE = 3;
+    GROUP_TYPE_PIVOT = 4;
+    GROUP_TYPE_GROUPING_SETS = 5;
+  }
+
+  message Pivot {
+    // (Required) The column to pivot
+    Expression col = 1;
+
+    // (Optional) List of values that will be translated to columns in the 
output DataFrame.
+    //
+    // Note that if it is empty, the server side will immediately trigger a 
job to collect
+    // the distinct values of the column.
+    repeated Expression.Literal values = 2;
+  }
+
+  message GroupingSets {
+    // (Required) Individual grouping set
+    repeated Expression grouping_set = 1;
+  }
+}
+
+// Relation of type [[Sort]].
+message Sort {
+  // (Required) Input relation for a Sort.
+  Relation input = 1;
+
+  // (Required) The ordering expressions
+  repeated Expression.SortOrder order = 2;
+
+  // (Optional) if this is a global sort.
+  optional bool is_global = 3;
+}
+
+
+// Drop specified columns.
+message Drop {
+  // (Required) The input relation.
+  Relation input = 1;
+
+  // (Optional) columns to drop.
+  repeated Expression columns = 2;
+
+  // (Optional) names of columns to drop.
+  repeated string column_names = 3;
+}
+
+
+// Relation of type [[Deduplicate]] which have duplicate rows removed, could 
consider either only
+// the subset of columns or all the columns.
+message Deduplicate {
+  // (Required) Input relation for a Deduplicate.
+  Relation input = 1;
+
+  // (Optional) Deduplicate based on a list of column names.
+  //
+  // This field does not co-use with `all_columns_as_keys`.
+  repeated string column_names = 2;
+
+  // (Optional) Deduplicate based on all the columns of the input relation.
+  //
+  // This field does not co-use with `column_names`.
+  optional bool all_columns_as_keys = 3;
+
+  // (Optional) Deduplicate within the time range of watermark.
+  optional bool within_watermark = 4;
+}
+
+// A relation that does not need to be qualified by name.
+message LocalRelation {
+  // (Optional) Local collection data serialized into Arrow IPC streaming 
format which contains
+  // the schema of the data.
+  optional bytes data = 1;
+
+  // (Optional) The schema of local data.
+  // It should be either a DDL-formatted type string or a JSON string.
+  //
+  // The server side will update the column names and data types according to 
this schema.
+  // If the 'data' is not provided, then this schema will be required.
+  optional string schema = 2;
+}
+
+// A local relation that has been cached already.
+message CachedLocalRelation {
+  // `userId` and `sessionId` fields are deleted since the server must always 
use the active
+  // session/user rather than arbitrary values provided by the client. It is 
never valid to access
+  // a local relation from a different session/user.
+  reserved 1, 2;
+  reserved "userId", "sessionId";
+
+  // (Required) A sha-256 hash of the serialized local relation in proto, see 
LocalRelation.
+  string hash = 3;
+}
+
+// Represents a remote relation that has been cached on server.
+message CachedRemoteRelation {
+  // (Required) ID of the remote related (assigned by the service).
+  string relation_id = 1;
+}
+
+// Relation of type [[Sample]] that samples a fraction of the dataset.
+message Sample {
+  // (Required) Input relation for a Sample.
+  Relation input = 1;
+
+  // (Required) lower bound.
+  double lower_bound = 2;
+
+  // (Required) upper bound.
+  double upper_bound = 3;
+
+  // (Optional) Whether to sample with replacement.
+  optional bool with_replacement = 4;
+
+  // (Required) The random seed.
+  // This field is required to avoid generating mutable dataframes (see 
SPARK-48184 for details),
+  // however, still keep it 'optional' here for backward compatibility.
+  optional int64 seed = 5;
+
+  // (Required) Explicitly sort the underlying plan to make the ordering 
deterministic or cache it.
+  // This flag is true when invoking `dataframe.randomSplit` to randomly 
splits DataFrame with the
+  // provided weights. Otherwise, it is false.
+  bool deterministic_order = 6;
+}
+
+// Relation of type [[Range]] that generates a sequence of integers.
+message Range {
+  // (Optional) Default value = 0
+  optional int64 start = 1;
+
+  // (Required)
+  int64 end = 2;
+
+  // (Required)
+  int64 step = 3;
+
+  // Optional. Default value is assigned by 1) SQL conf 
"spark.sql.leafNodeDefaultParallelism" if
+  // it is set, or 2) spark default parallelism.
+  optional int32 num_partitions = 4;
+}
+
+// Relation alias.
+message SubqueryAlias {
+  // (Required) The input relation of SubqueryAlias.
+  Relation input = 1;
+
+  // (Required) The alias.
+  string alias = 2;
+
+  // (Optional) Qualifier of the alias.
+  repeated string qualifier = 3;
+}
+
+// Relation repartition.
+message Repartition {
+  // (Required) The input relation of Repartition.
+  Relation input = 1;
+
+  // (Required) Must be positive.
+  int32 num_partitions = 2;
+
+  // (Optional) Default value is false.
+  optional bool shuffle = 3;
+}
+
+// Compose the string representing rows for output.
+// It will invoke 'Dataset.showString' to compute the results.
+message ShowString {
+  // (Required) The input relation.
+  Relation input = 1;
+
+  // (Required) Number of rows to show.
+  int32 num_rows = 2;
+
+  // (Required) If set to more than 0, truncates strings to
+  // `truncate` characters and all cells will be aligned right.
+  int32 truncate = 3;
+
+  // (Required) If set to true, prints output rows vertically (one line per 
column value).
+  bool vertical = 4;
+}
+
+// Compose the string representing rows for output.
+// It will invoke 'Dataset.htmlString' to compute the results.
+message HtmlString {
+  // (Required) The input relation.
+  Relation input = 1;
+
+  // (Required) Number of rows to show.
+  int32 num_rows = 2;
+
+  // (Required) If set to more than 0, truncates strings to
+  // `truncate` characters and all cells will be aligned right.
+  int32 truncate = 3;
+}
+
+// Computes specified statistics for numeric and string columns.
+// It will invoke 'Dataset.summary' (same as 'StatFunctions.summary')
+// to compute the results.
+message StatSummary {
+  // (Required) The input relation.
+  Relation input = 1;
+
+  // (Optional) Statistics from to be computed.
+  //
+  // Available statistics are:
+  //  count
+  //  mean
+  //  stddev
+  //  min
+  //  max
+  //  arbitrary approximate percentiles specified as a percentage (e.g. 75%)
+  //  count_distinct
+  //  approx_count_distinct
+  //
+  // If no statistics are given, this function computes 'count', 'mean', 
'stddev', 'min',
+  // 'approximate quartiles' (percentiles at 25%, 50%, and 75%), and 'max'.
+  repeated string statistics = 2;
+}
+
+// Computes basic statistics for numeric and string columns, including count, 
mean, stddev, min,
+// and max. If no columns are given, this function computes statistics for all 
numerical or
+// string columns.
+message StatDescribe {
+  // (Required) The input relation.
+  Relation input = 1;
+
+  // (Optional) Columns to compute statistics on.
+  repeated string cols = 2;
+}
+
+// Computes a pair-wise frequency table of the given columns. Also known as a 
contingency table.
+// It will invoke 'Dataset.stat.crosstab' (same as 
'StatFunctions.crossTabulate')
+// to compute the results.
+message StatCrosstab {
+  // (Required) The input relation.
+  Relation input = 1;
+
+  // (Required) The name of the first column.
+  //
+  // Distinct items will make the first item of each row.
+  string col1 = 2;
+
+  // (Required) The name of the second column.
+  //
+  // Distinct items will make the column names of the DataFrame.
+  string col2 = 3;
+}
+
+// Calculate the sample covariance of two numerical columns of a DataFrame.
+// It will invoke 'Dataset.stat.cov' (same as 'StatFunctions.calculateCov') to 
compute the results.
+message StatCov {
+  // (Required) The input relation.
+  Relation input = 1;
+
+  // (Required) The name of the first column.
+  string col1 = 2;
+
+  // (Required) The name of the second column.
+  string col2 = 3;
+}
+
+// Calculates the correlation of two columns of a DataFrame. Currently only 
supports the Pearson
+// Correlation Coefficient. It will invoke 'Dataset.stat.corr' (same as
+// 'StatFunctions.pearsonCorrelation') to compute the results.
+message StatCorr {
+  // (Required) The input relation.
+  Relation input = 1;
+
+  // (Required) The name of the first column.
+  string col1 = 2;
+
+  // (Required) The name of the second column.
+  string col2 = 3;
+
+  // (Optional) Default value is 'pearson'.
+  //
+  // Currently only supports the Pearson Correlation Coefficient.
+  optional string method = 4;
+}
+
+// Calculates the approximate quantiles of numerical columns of a DataFrame.
+// It will invoke 'Dataset.stat.approxQuantile' (same as 
'StatFunctions.approxQuantile')
+// to compute the results.
+message StatApproxQuantile {
+  // (Required) The input relation.
+  Relation input = 1;
+
+  // (Required) The names of the numerical columns.
+  repeated string cols = 2;
+
+  // (Required) A list of quantile probabilities.
+  //
+  // Each number must belong to [0, 1].
+  // For example 0 is the minimum, 0.5 is the median, 1 is the maximum.
+  repeated double probabilities = 3;
+
+  // (Required) The relative target precision to achieve (greater than or 
equal to 0).
+  //
+  // If set to zero, the exact quantiles are computed, which could be very 
expensive.
+  // Note that values greater than 1 are accepted but give the same result as 
1.
+  double relative_error = 4;
+}
+
+// Finding frequent items for columns, possibly with false positives.
+// It will invoke 'Dataset.stat.freqItems' (same as 'StatFunctions.freqItems')
+// to compute the results.
+message StatFreqItems {
+  // (Required) The input relation.
+  Relation input = 1;
+
+  // (Required) The names of the columns to search frequent items in.
+  repeated string cols = 2;
+
+  // (Optional) The minimum frequency for an item to be considered `frequent`.
+  // Should be greater than 1e-4.
+  optional double support = 3;
+}
+
+
+// Returns a stratified sample without replacement based on the fraction
+// given on each stratum.
+// It will invoke 'Dataset.stat.freqItems' (same as 'StatFunctions.freqItems')
+// to compute the results.
+message StatSampleBy {
+  // (Required) The input relation.
+  Relation input = 1;
+
+  // (Required) The column that defines strata.
+  Expression col = 2;
+
+  // (Required) Sampling fraction for each stratum.
+  //
+  // If a stratum is not specified, we treat its fraction as zero.
+  repeated Fraction fractions = 3;
+
+  // (Required) The random seed.
+  // This field is required to avoid generating mutable dataframes (see 
SPARK-48184 for details),
+  // however, still keep it 'optional' here for backward compatibility.
+  optional int64 seed = 5;
+
+  message Fraction {
+    // (Required) The stratum.
+    Expression.Literal stratum = 1;
+
+    // (Required) The fraction value. Must be in [0, 1].
+    double fraction = 2;
+  }
+}
+
+
+// Replaces null values.
+// It will invoke 'Dataset.na.fill' (same as 'DataFrameNaFunctions.fill') to 
compute the results.
+// Following 3 parameter combinations are supported:
+//  1, 'values' only contains 1 item, 'cols' is empty:
+//    replaces null values in all type-compatible columns.
+//  2, 'values' only contains 1 item, 'cols' is not empty:
+//    replaces null values in specified columns.
+//  3, 'values' contains more than 1 items, then 'cols' is required to have 
the same length:
+//    replaces each specified column with corresponding value.
+message NAFill {
+  // (Required) The input relation.
+  Relation input = 1;
+
+  // (Optional) Optional list of column names to consider.
+  repeated string cols = 2;
+
+  // (Required) Values to replace null values with.
+  //
+  // Should contain at least 1 item.
+  // Only 4 data types are supported now: bool, long, double, string
+  repeated Expression.Literal values = 3;
+}
+
+
+// Drop rows containing null values.
+// It will invoke 'Dataset.na.drop' (same as 'DataFrameNaFunctions.drop') to 
compute the results.
+message NADrop {
+  // (Required) The input relation.
+  Relation input = 1;
+
+  // (Optional) Optional list of column names to consider.
+  //
+  // When it is empty, all the columns in the input relation will be 
considered.
+  repeated string cols = 2;
+
+  // (Optional) The minimum number of non-null and non-NaN values required to 
keep.
+  //
+  // When not set, it is equivalent to the number of considered columns, which 
means
+  // a row will be kept only if all columns are non-null.
+  //
+  // 'how' options ('all', 'any') can be easily converted to this field:
+  //   - 'all' -> set 'min_non_nulls' 1;
+  //   - 'any' -> keep 'min_non_nulls' unset;
+  optional int32 min_non_nulls = 3;
+}
+
+
+// Replaces old values with the corresponding values.
+// It will invoke 'Dataset.na.replace' (same as 'DataFrameNaFunctions.replace')
+// to compute the results.
+message NAReplace {
+  // (Required) The input relation.
+  Relation input = 1;
+
+  // (Optional) List of column names to consider.
+  //
+  // When it is empty, all the type-compatible columns in the input relation 
will be considered.
+  repeated string cols = 2;
+
+  // (Optional) The value replacement mapping.
+  repeated Replacement replacements = 3;
+
+  message Replacement {
+    // (Required) The old value.
+    //
+    // Only 4 data types are supported now: null, bool, double, string.
+    Expression.Literal old_value = 1;
+
+    // (Required) The new value.
+    //
+    // Should be of the same data type with the old value.
+    Expression.Literal new_value = 2;
+  }
+}
+
+
+// Rename columns on the input relation by the same length of names.
+message ToDF {
+  // (Required) The input relation of RenameColumnsBySameLengthNames.
+  Relation input = 1;
+
+  // (Required)
+  //
+  // The number of columns of the input relation must be equal to the length
+  // of this field. If this is not true, an exception will be returned.
+  repeated string column_names = 2;
+}
+
+
+// Rename columns on the input relation by a map with name to name mapping.
+message WithColumnsRenamed {
+  // (Required) The input relation.
+  Relation input = 1;
+
+
+  // (Optional)
+  //
+  // Renaming column names of input relation from A to B where A is the map key
+  // and B is the map value. This is a no-op if schema doesn't contain any A. 
It
+  // does not require that all input relation column names to present as keys.
+  // duplicated B are not allowed.
+  map<string, string> rename_columns_map = 2 [deprecated=true];
+
+  repeated Rename renames = 3;
+
+  message Rename {
+    // (Required) The existing column name.
+    string col_name = 1;
+
+    // (Required) The new column name.
+    string new_col_name = 2;
+  }
+}
+
+// Adding columns or replacing the existing columns that have the same names.
+message WithColumns {
+  // (Required) The input relation.
+  Relation input = 1;
+
+  // (Required)
+  //
+  // Given a column name, apply the corresponding expression on the column. If 
column
+  // name exists in the input relation, then replace the column. If the column 
name
+  // does not exist in the input relation, then adds it as a new column.
+  //
+  // Only one name part is expected from each Expression.Alias.
+  //
+  // An exception is thrown when duplicated names are present in the mapping.
+  repeated Expression.Alias aliases = 2;
+}
+
+message WithWatermark {
+
+  // (Required) The input relation
+  Relation input = 1;
+
+  // (Required) Name of the column containing event time.
+  string event_time = 2;
+
+  // (Required)
+  string delay_threshold = 3;
+}
+
+// Specify a hint over a relation. Hint should have a name and optional 
parameters.
+message Hint {
+  // (Required) The input relation.
+  Relation input = 1;
+
+  // (Required) Hint name.
+  //
+  // Supported Join hints include BROADCAST, MERGE, SHUFFLE_HASH, 
SHUFFLE_REPLICATE_NL.
+  //
+  // Supported partitioning hints include COALESCE, REPARTITION, 
REPARTITION_BY_RANGE.
+  string name = 2;
+
+  // (Optional) Hint parameters.
+  repeated Expression parameters = 3;
+}
+
+// Unpivot a DataFrame from wide format to long format, optionally leaving 
identifier columns set.
+message Unpivot {
+  // (Required) The input relation.
+  Relation input = 1;
+
+  // (Required) Id columns.
+  repeated Expression ids = 2;
+
+  // (Optional) Value columns to unpivot.
+  optional Values values = 3;
+
+  // (Required) Name of the variable column.
+  string variable_column_name = 4;
+
+  // (Required) Name of the value column.
+  string value_column_name = 5;
+
+  message Values {
+    repeated Expression values = 1;
+  }
+}
+
+message ToSchema {
+  // (Required) The input relation.
+  Relation input = 1;
+
+  // (Required) The user provided schema.
+  //
+  // The Sever side will update the dataframe with this schema.
+  DataType schema = 2;
+}
+
+message RepartitionByExpression {
+  // (Required) The input relation.
+  Relation input = 1;
+
+  // (Required) The partitioning expressions.
+  repeated Expression partition_exprs = 2;
+
+  // (Optional) number of partitions, must be positive.
+  optional int32 num_partitions = 3;
+}
+
+message MapPartitions {
+  // (Required) Input relation for a mapPartitions-equivalent API: 
mapInPandas, mapInArrow.
+  Relation input = 1;
+
+  // (Required) Input user-defined function.
+  CommonInlineUserDefinedFunction func = 2;
+
+  // (Optional) Whether to use barrier mode execution or not.
+  optional bool is_barrier = 3;
+
+  // (Optional) ResourceProfile id used for the stage level scheduling.
+  optional int32 profile_id = 4;
+}
+
+message GroupMap {
+  // (Required) Input relation for Group Map API: apply, applyInPandas.
+  Relation input = 1;
+
+  // (Required) Expressions for grouping keys.
+  repeated Expression grouping_expressions = 2;
+
+  // (Required) Input user-defined function.
+  CommonInlineUserDefinedFunction func = 3;
+
+  // (Optional) Expressions for sorting. Only used by Scala Sorted Group Map 
API.
+  repeated Expression sorting_expressions = 4;
+
+  // Below fields are only used by (Flat)MapGroupsWithState
+  // (Optional) Input relation for initial State.
+  Relation initial_input = 5;
+
+  // (Optional) Expressions for grouping keys of the initial state input 
relation.
+  repeated Expression initial_grouping_expressions = 6;
+
+  // (Optional) True if MapGroupsWithState, false if FlatMapGroupsWithState.
+  optional bool is_map_groups_with_state = 7;
+
+  // (Optional) The output mode of the function.
+  optional string output_mode = 8;
+
+  // (Optional) Timeout configuration for groups that do not receive data for 
a while.
+  optional string timeout_conf = 9;
+}
+
+message CoGroupMap {
+  // (Required) One input relation for CoGroup Map API - applyInPandas.
+  Relation input = 1;
+
+  // Expressions for grouping keys of the first input relation.
+  repeated Expression input_grouping_expressions = 2;
+
+  // (Required) The other input relation.
+  Relation other = 3;
+
+  // Expressions for grouping keys of the other input relation.
+  repeated Expression other_grouping_expressions = 4;
+
+  // (Required) Input user-defined function.
+  CommonInlineUserDefinedFunction func = 5;
+
+  // (Optional) Expressions for sorting. Only used by Scala Sorted CoGroup Map 
API.
+  repeated Expression input_sorting_expressions = 6;
+
+  // (Optional) Expressions for sorting. Only used by Scala Sorted CoGroup Map 
API.
+  repeated Expression other_sorting_expressions = 7;
+}
+
+message ApplyInPandasWithState {
+  // (Required) Input relation for applyInPandasWithState.
+  Relation input = 1;
+
+  // (Required) Expressions for grouping keys.
+  repeated Expression grouping_expressions = 2;
+
+  // (Required) Input user-defined function.
+  CommonInlineUserDefinedFunction func = 3;
+
+  // (Required) Schema for the output DataFrame.
+  string output_schema = 4;
+
+  // (Required) Schema for the state.
+  string state_schema = 5;
+
+  // (Required) The output mode of the function.
+  string output_mode = 6;
+
+  // (Required) Timeout configuration for groups that do not receive data for 
a while.
+  string timeout_conf = 7;
+}
+
+message CommonInlineUserDefinedTableFunction {
+  // (Required) Name of the user-defined table function.
+  string function_name = 1;
+
+  // (Optional) Whether the user-defined table function is deterministic.
+  bool deterministic = 2;
+
+  // (Optional) Function input arguments. Empty arguments are allowed.
+  repeated Expression arguments = 3;
+
+  // (Required) Type of the user-defined table function.
+  oneof function {
+    PythonUDTF python_udtf = 4;
+  }
+}
+
+message PythonUDTF {
+  // (Optional) Return type of the Python UDTF.
+  optional DataType return_type = 1;
+
+  // (Required) EvalType of the Python UDTF.
+  int32 eval_type = 2;
+
+  // (Required) The encoded commands of the Python UDTF.
+  bytes command = 3;
+
+  // (Required) Python version being used in the client.
+  string python_ver = 4;
+}
+
+message CommonInlineUserDefinedDataSource {
+  // (Required) Name of the data source.
+  string name = 1;
+
+  // (Required) The data source type.
+  oneof data_source {
+    PythonDataSource python_data_source = 2;
+  }
+}
+
+message PythonDataSource {
+  // (Required) The encoded commands of the Python data source.
+  bytes command = 1;
+
+  // (Required) Python version being used in the client.
+  string python_ver = 2;
+}
+
+// Collect arbitrary (named) metrics from a dataset.
+message CollectMetrics {
+  // (Required) The input relation.
+  Relation input = 1;
+
+  // (Required) Name of the metrics.
+  string name = 2;
+
+  // (Required) The metric sequence.
+  repeated Expression metrics = 3;
+}
+
+message Parse {
+  // (Required) Input relation to Parse. The input is expected to have single 
text column.
+  Relation input = 1;
+  // (Required) The expected format of the text.
+  ParseFormat format = 2;
+
+  // (Optional) DataType representing the schema. If not set, Spark will infer 
the schema.
+  optional DataType schema = 3;
+
+  // Options for the csv/json parser. The map key is case insensitive.
+  map<string, string> options = 4;
+  enum ParseFormat {
+    PARSE_FORMAT_UNSPECIFIED = 0;
+    PARSE_FORMAT_CSV = 1;
+    PARSE_FORMAT_JSON = 2;
+  }
+}
+
+// Relation of type [[AsOfJoin]].
+//
+// `left` and `right` must be present.
+message AsOfJoin {
+  // (Required) Left input relation for a Join.
+  Relation left = 1;
+
+  // (Required) Right input relation for a Join.
+  Relation right = 2;
+
+  // (Required) Field to join on in left DataFrame
+  Expression left_as_of = 3;
+
+  // (Required) Field to join on in right DataFrame
+  Expression right_as_of = 4;
+
+  // (Optional) The join condition. Could be unset when `using_columns` is 
utilized.
+  //
+  // This field does not co-exist with using_columns.
+  Expression join_expr = 5;
+
+  // Optional. using_columns provides a list of columns that should present on 
both sides of
+  // the join inputs that this Join will join on. For example A JOIN B USING 
col_name is
+  // equivalent to A JOIN B on A.col_name = B.col_name.
+  //
+  // This field does not co-exist with join_condition.
+  repeated string using_columns = 6;
+
+  // (Required) The join type.
+  string join_type = 7;
+
+  // (Optional) The asof tolerance within this range.
+  Expression tolerance = 8;
+
+  // (Required) Whether allow matching with the same value or not.
+  bool allow_exact_matches = 9;
+
+  // (Required) Whether to search for prior, subsequent, or closest matches.
+  string direction = 10;
+}
diff --git 
a/kyuubi-relocated-spark-connect-rpc/src/main/protobuf/spark/connect/types.proto
 
b/kyuubi-relocated-spark-connect-rpc/src/main/protobuf/spark/connect/types.proto
new file mode 100644
index 0000000..d6b4ffb
--- /dev/null
+++ 
b/kyuubi-relocated-spark-connect-rpc/src/main/protobuf/spark/connect/types.proto
@@ -0,0 +1,201 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+syntax = 'proto3';
+
+package spark.connect;
+
+option java_multiple_files = true;
+option java_package = "org.apache.kyuubi.shaded.spark.connect.proto";
+option go_package = "internal/generated";
+
+// This message describes the logical [[DataType]] of something. It does not 
carry the value
+// itself but only describes it.
+message DataType {
+  oneof kind {
+    NULL null = 1;
+
+    Binary binary = 2;
+
+    Boolean boolean = 3;
+
+    // Numeric types
+    Byte byte = 4;
+    Short short = 5;
+    Integer integer = 6;
+    Long long = 7;
+
+    Float float = 8;
+    Double double = 9;
+    Decimal decimal = 10;
+
+    // String types
+    String string = 11;
+    Char char = 12;
+    VarChar var_char = 13;
+
+    // Datatime types
+    Date date = 14;
+    Timestamp timestamp = 15;
+    TimestampNTZ timestamp_ntz = 16;
+
+    // Interval types
+    CalendarInterval calendar_interval = 17;
+    YearMonthInterval year_month_interval = 18;
+    DayTimeInterval day_time_interval = 19;
+
+    // Complex types
+    Array array = 20;
+    Struct struct = 21;
+    Map map = 22;
+    Variant variant = 25;
+
+    // UserDefinedType
+    UDT udt = 23;
+
+    // UnparsedDataType
+    Unparsed unparsed = 24;
+  }
+
+  message Boolean {
+    uint32 type_variation_reference = 1;
+  }
+
+  message Byte {
+    uint32 type_variation_reference = 1;
+  }
+
+  message Short {
+    uint32 type_variation_reference = 1;
+  }
+
+  message Integer {
+    uint32 type_variation_reference = 1;
+  }
+
+  message Long {
+    uint32 type_variation_reference = 1;
+  }
+
+  message Float {
+    uint32 type_variation_reference = 1;
+  }
+
+  message Double {
+    uint32 type_variation_reference = 1;
+  }
+
+  message String {
+    uint32 type_variation_reference = 1;
+    string collation = 2;
+  }
+
+  message Binary {
+    uint32 type_variation_reference = 1;
+  }
+
+  message NULL {
+    uint32 type_variation_reference = 1;
+  }
+
+  message Timestamp {
+    uint32 type_variation_reference = 1;
+  }
+
+  message Date {
+    uint32 type_variation_reference = 1;
+  }
+
+  message TimestampNTZ {
+    uint32 type_variation_reference = 1;
+  }
+
+  message CalendarInterval {
+    uint32 type_variation_reference = 1;
+  }
+
+  message YearMonthInterval {
+    optional int32 start_field = 1;
+    optional int32 end_field = 2;
+    uint32 type_variation_reference = 3;
+  }
+
+  message DayTimeInterval {
+    optional int32 start_field = 1;
+    optional int32 end_field = 2;
+    uint32 type_variation_reference = 3;
+  }
+
+  // Start compound types.
+  message Char {
+    int32 length = 1;
+    uint32 type_variation_reference = 2;
+  }
+
+  message VarChar {
+    int32 length = 1;
+    uint32 type_variation_reference = 2;
+  }
+
+  message Decimal {
+    optional int32 scale = 1;
+    optional int32 precision = 2;
+    uint32 type_variation_reference = 3;
+  }
+
+  message StructField {
+    string name = 1;
+    DataType data_type = 2;
+    bool nullable = 3;
+    optional string metadata = 4;
+  }
+
+  message Struct {
+    repeated StructField fields = 1;
+    uint32 type_variation_reference = 2;
+  }
+
+  message Array {
+    DataType element_type = 1;
+    bool contains_null = 2;
+    uint32 type_variation_reference = 3;
+  }
+
+  message Map {
+    DataType key_type = 1;
+    DataType value_type = 2;
+    bool value_contains_null = 3;
+    uint32 type_variation_reference = 4;
+  }
+
+  message Variant {
+    uint32 type_variation_reference = 1;
+  }
+
+  message UDT {
+    string type = 1;
+    optional string jvm_class = 2;
+    optional string python_class = 3;
+    optional string serialized_python_class = 4;
+    DataType sql_type = 5;
+  }
+
+  message Unparsed {
+    // (Required) The unparsed data type string
+    string data_type_string = 1;
+  }
+}
diff --git 
a/kyuubi-relocated-spark-connect-rpc/src/main/resources/META-INF/NOTICE 
b/kyuubi-relocated-spark-connect-rpc/src/main/resources/META-INF/NOTICE
new file mode 100644
index 0000000..b65f4e7
--- /dev/null
+++ b/kyuubi-relocated-spark-connect-rpc/src/main/resources/META-INF/NOTICE
@@ -0,0 +1,8 @@
+kyuubi-relocated-spark-connect-rpc
+Copyright 2023-2024 The Apache Software Foundation
+
+This product includes software developed at
+The Apache Software Foundation (https://www.apache.org/).
+
+Apache Spark
+Copyright 2014 and onwards The Apache Software Foundation.
diff --git a/pom.xml b/pom.xml
index fef65db..ad18d71 100644
--- a/pom.xml
+++ b/pom.xml
@@ -54,6 +54,7 @@
         <module>kyuubi-relocated-force-shading</module>
         <module>kyuubi-relocated-hive-metastore-client</module>
         <module>kyuubi-relocated-hive-service-rpc</module>
+        <module>kyuubi-relocated-spark-connect-rpc</module>
         <module>kyuubi-relocated-thrift</module>
         <module>kyuubi-relocated-zookeeper-parent</module>
     </modules>
@@ -97,6 +98,7 @@
         <maven.plugin.surefire.version>3.0.0-M8</maven.plugin.surefire.version>
         <maven.plugin.spotless.version>2.30.0</maven.plugin.spotless.version>
         <maven.plugin.jacoco.version>0.8.7</maven.plugin.jacoco.version>
+        <maven.plugin.os.version>1.7.1</maven.plugin.os.version>
         <maven.plugin.shade.version>3.4.1</maven.plugin.shade.version>
 
         <!-- Needed for consistent times -->
@@ -457,6 +459,13 @@
                 </executions>
             </plugin>
         </plugins>
+        <extensions>
+            <extension>
+                <groupId>kr.motd.maven</groupId>
+                <artifactId>os-maven-plugin</artifactId>
+                <version>${maven.plugin.os.version}</version>
+            </extension>
+        </extensions>
     </build>
 
     <profiles>

(kyuubi-shaded) branch master updated: [KYUUBI-SHADED #49] Create relocated package for spark-connect-rpc

Reply via email to