This is an automated email from the ASF dual-hosted git repository.
hvanhovell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new ad503ca70ac [SPARK-41369][CONNECT][BUILD] Split connect project into
common and server projects
ad503ca70ac is described below
commit ad503ca70acd3d5e3d815efac6f675b14797ded1
Author: vicennial <[email protected]>
AuthorDate: Tue Dec 6 17:06:31 2022 -0400
[SPARK-41369][CONNECT][BUILD] Split connect project into common and server
projects
### What changes were proposed in this pull request?
We split the current `connector/connect` project into two projects:
- `connector/connect/common`: this contains the proto definitions, and
build targets for proto buf related code generation. In the future this can
also contain utilities that are shared between the server and the client.
- `connector/connect/server`: this contains the code for the connect server
and plugin (all the current scala code). This includes the dsl because it is
used for server related tests. In the future we might replace the DSL by the
scala client.
### Why are the changes needed?
In preparation for the Spark Connect Scala Client we need to split the
server and the proto files. This, together with another refactoring in
Catalyst, will allow us to create a client with a minimal set of dependencies.
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
Existing tests
Closes #38944 from hvanhovell/refactorConnect-finalShade.
Lead-authored-by: vicennial <[email protected]>
Co-authored-by: Herman van Hovell <[email protected]>
Signed-off-by: Herman van Hovell <[email protected]>
---
connector/connect/common/pom.xml | 225 +++++++++++++++++++++
.../connect/{ => common}/src/main/buf.gen.yaml | 0
.../connect/{ => common}/src/main/buf.work.yaml | 0
.../{ => common}/src/main/protobuf/buf.yaml | 0
.../src/main/protobuf/spark/connect/base.proto | 0
.../src/main/protobuf/spark/connect/commands.proto | 0
.../main/protobuf/spark/connect/expressions.proto | 0
.../main/protobuf/spark/connect/relations.proto | 0
.../src/main/protobuf/spark/connect/types.proto | 0
connector/connect/dev/generate_protos.sh | 2 +-
connector/connect/{ => server}/pom.xml | 85 ++------
.../spark/sql/connect/SparkConnectPlugin.scala | 0
.../apache/spark/sql/connect/config/Connect.scala | 0
.../org/apache/spark/sql/connect/dsl/package.scala | 0
.../connect/planner/DataTypeProtoConverter.scala | 0
.../sql/connect/planner/SparkConnectPlanner.scala | 0
.../service/SparkConnectInterceptorRegistry.scala | 0
.../sql/connect/service/SparkConnectService.scala | 0
.../service/SparkConnectStreamHandler.scala | 0
.../src/test/resources/log4j2.properties | 0
.../messages/ConnectProtoMessagesSuite.scala | 0
.../connect/planner/SparkConnectPlannerSuite.scala | 0
.../connect/planner/SparkConnectProtoSuite.scala | 0
.../connect/planner/SparkConnectServiceSuite.scala | 0
.../connect/service/InterceptorRegistrySuite.scala | 0
pom.xml | 3 +-
project/SparkBuild.scala | 114 ++++++++---
python/pyspark/testing/connectutils.py | 2 +-
28 files changed, 332 insertions(+), 99 deletions(-)
diff --git a/connector/connect/common/pom.xml b/connector/connect/common/pom.xml
new file mode 100644
index 00000000000..555afd5bc44
--- /dev/null
+++ b/connector/connect/common/pom.xml
@@ -0,0 +1,225 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ ~ Licensed to the Apache Software Foundation (ASF) under one or more
+ ~ contributor license agreements. See the NOTICE file distributed with
+ ~ this work for additional information regarding copyright ownership.
+ ~ The ASF licenses this file to You under the Apache License, Version 2.0
+ ~ (the "License"); you may not use this file except in compliance with
+ ~ the License. You may obtain a copy of the License at
+ ~
+ ~ http://www.apache.org/licenses/LICENSE-2.0
+ ~
+ ~ Unless required by applicable law or agreed to in writing, software
+ ~ distributed under the License is distributed on an "AS IS" BASIS,
+ ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ~ See the License for the specific language governing permissions and
+ ~ limitations under the License.
+ -->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <parent>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-parent_2.12</artifactId>
+ <version>3.4.0-SNAPSHOT</version>
+ <relativePath>../../../pom.xml</relativePath>
+ </parent>
+
+ <artifactId>spark-connect-common_2.12</artifactId>
+ <packaging>jar</packaging>
+ <name>Spark Project Connect Common</name>
+ <url>https://spark.apache.org/</url>
+ <properties>
+ <sbt.project.name>connect-common</sbt.project.name>
+ <guava.version>31.0.1-jre</guava.version>
+ <guava.failureaccess.version>1.0.1</guava.failureaccess.version>
+ <io.grpc.version>1.47.0</io.grpc.version>
+ <tomcat.annotations.api.version>6.0.53</tomcat.annotations.api.version>
+ </properties>
+ <dependencies>
+ <dependency>
+ <groupId>org.scala-lang</groupId>
+ <artifactId>scala-library</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>com.google.guava</groupId>
+ <artifactId>guava</artifactId>
+ <version>${guava.version}</version>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>com.google.guava</groupId>
+ <artifactId>failureaccess</artifactId>
+ <version>${guava.failureaccess.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>com.google.protobuf</groupId>
+ <artifactId>protobuf-java</artifactId>
+ <version>${protobuf.version}</version>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>io.grpc</groupId>
+ <artifactId>grpc-netty</artifactId>
+ <version>${io.grpc.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>io.grpc</groupId>
+ <artifactId>grpc-protobuf</artifactId>
+ <version>${io.grpc.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>io.grpc</groupId>
+ <artifactId>grpc-services</artifactId>
+ <version>${io.grpc.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>io.grpc</groupId>
+ <artifactId>grpc-stub</artifactId>
+ <version>${io.grpc.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>io.netty</groupId>
+ <artifactId>netty-codec-http2</artifactId>
+ <version>${netty.version}</version>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>io.netty</groupId>
+ <artifactId>netty-handler-proxy</artifactId>
+ <version>${netty.version}</version>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>io.netty</groupId>
+ <artifactId>netty-transport-native-unix-common</artifactId>
+ <version>${netty.version}</version>
+ <scope>provided</scope>
+ </dependency>
+ <dependency> <!-- necessary for Java 9+ -->
+ <groupId>org.apache.tomcat</groupId>
+ <artifactId>annotations-api</artifactId>
+ <version>${tomcat.annotations.api.version}</version>
+ <scope>provided</scope>
+ </dependency>
+ <!--
+ This spark-tags test-dep is needed even though it isn't used in this
module,
+ otherwise testing-cmds that excludethem will yield errors.
+ -->
+ <dependency>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-tags_${scala.binary.version}</artifactId>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+ </dependencies>
+ <build>
+ <!-- Protobuf compilation for Spark Connect -->
+ <extensions>
+ <extension>
+ <groupId>kr.motd.maven</groupId>
+ <artifactId>os-maven-plugin</artifactId>
+ <version>1.6.2</version>
+ </extension>
+ </extensions>
+
<outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
+
<testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
+ <plugins>
+ <plugin>
+ <groupId>org.codehaus.mojo</groupId>
+ <artifactId>build-helper-maven-plugin</artifactId>
+ <executions>
+ <execution>
+ <id>add-sources</id>
+ <phase>generate-sources</phase>
+ <goals>
+ <goal>add-source</goal>
+ </goals>
+ <configuration>
+ <sources>
+
<source>src/main/scala-${scala.binary.version}</source>
+ </sources>
+ </configuration>
+ </execution>
+ <execution>
+ <id>add-scala-test-sources</id>
+ <phase>generate-test-sources</phase>
+ <goals>
+ <goal>add-test-source</goal>
+ </goals>
+ <configuration>
+ <sources>
+ <source>src/test/gen-java</source>
+ </sources>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+ </plugins>
+ </build>
+ <profiles>
+ <profile>
+ <id>default-protoc</id>
+ <activation>
+ <activeByDefault>true</activeByDefault>
+ </activation>
+ <build>
+ <plugins>
+ <!-- Add protobuf-maven-plugin and provide ScalaPB as a
code generation plugin -->
+ <plugin>
+ <groupId>org.xolstice.maven.plugins</groupId>
+ <artifactId>protobuf-maven-plugin</artifactId>
+ <version>0.6.1</version>
+ <configuration>
+
<protocArtifact>com.google.protobuf:protoc:${protobuf.version}:exe:${os.detected.classifier}</protocArtifact>
+ <pluginId>grpc-java</pluginId>
+
<pluginArtifact>io.grpc:protoc-gen-grpc-java:${io.grpc.version}:exe:${os.detected.classifier}</pluginArtifact>
+
<protoSourceRoot>src/main/protobuf</protoSourceRoot>
+ </configuration>
+ <executions>
+ <execution>
+ <goals>
+ <goal>compile</goal>
+ <goal>compile-custom</goal>
+ <goal>test-compile</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+ </plugins>
+ </build>
+ </profile>
+ <profile>
+ <id>user-defined-protoc</id>
+ <properties>
+
<connect.protoc.executable.path>${env.CONNECT_PROTOC_EXEC_PATH}</connect.protoc.executable.path>
+
<connect.plugin.executable.path>${env.CONNECT_PLUGIN_EXEC_PATH}</connect.plugin.executable.path>
+ </properties>
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.xolstice.maven.plugins</groupId>
+ <artifactId>protobuf-maven-plugin</artifactId>
+ <version>0.6.1</version>
+ <configuration>
+
<protocExecutable>${connect.protoc.executable.path}</protocExecutable>
+ <pluginId>grpc-java</pluginId>
+
<pluginExecutable>${connect.plugin.executable.path}</pluginExecutable>
+
<protoSourceRoot>src/main/protobuf</protoSourceRoot>
+ </configuration>
+ <executions>
+ <execution>
+ <goals>
+ <goal>compile</goal>
+ <goal>compile-custom</goal>
+ <goal>test-compile</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+ </plugins>
+ </build>
+ </profile>
+ </profiles>
+</project>
diff --git a/connector/connect/src/main/buf.gen.yaml
b/connector/connect/common/src/main/buf.gen.yaml
similarity index 100%
rename from connector/connect/src/main/buf.gen.yaml
rename to connector/connect/common/src/main/buf.gen.yaml
diff --git a/connector/connect/src/main/buf.work.yaml
b/connector/connect/common/src/main/buf.work.yaml
similarity index 100%
rename from connector/connect/src/main/buf.work.yaml
rename to connector/connect/common/src/main/buf.work.yaml
diff --git a/connector/connect/src/main/protobuf/buf.yaml
b/connector/connect/common/src/main/protobuf/buf.yaml
similarity index 100%
rename from connector/connect/src/main/protobuf/buf.yaml
rename to connector/connect/common/src/main/protobuf/buf.yaml
diff --git a/connector/connect/src/main/protobuf/spark/connect/base.proto
b/connector/connect/common/src/main/protobuf/spark/connect/base.proto
similarity index 100%
rename from connector/connect/src/main/protobuf/spark/connect/base.proto
rename to connector/connect/common/src/main/protobuf/spark/connect/base.proto
diff --git a/connector/connect/src/main/protobuf/spark/connect/commands.proto
b/connector/connect/common/src/main/protobuf/spark/connect/commands.proto
similarity index 100%
rename from connector/connect/src/main/protobuf/spark/connect/commands.proto
rename to
connector/connect/common/src/main/protobuf/spark/connect/commands.proto
diff --git
a/connector/connect/src/main/protobuf/spark/connect/expressions.proto
b/connector/connect/common/src/main/protobuf/spark/connect/expressions.proto
similarity index 100%
rename from connector/connect/src/main/protobuf/spark/connect/expressions.proto
rename to
connector/connect/common/src/main/protobuf/spark/connect/expressions.proto
diff --git a/connector/connect/src/main/protobuf/spark/connect/relations.proto
b/connector/connect/common/src/main/protobuf/spark/connect/relations.proto
similarity index 100%
rename from connector/connect/src/main/protobuf/spark/connect/relations.proto
rename to
connector/connect/common/src/main/protobuf/spark/connect/relations.proto
diff --git a/connector/connect/src/main/protobuf/spark/connect/types.proto
b/connector/connect/common/src/main/protobuf/spark/connect/types.proto
similarity index 100%
rename from connector/connect/src/main/protobuf/spark/connect/types.proto
rename to connector/connect/common/src/main/protobuf/spark/connect/types.proto
diff --git a/connector/connect/dev/generate_protos.sh
b/connector/connect/dev/generate_protos.sh
index d327fdb0dac..38cb821a47c 100755
--- a/connector/connect/dev/generate_protos.sh
+++ b/connector/connect/dev/generate_protos.sh
@@ -36,7 +36,7 @@ if [[ $# -eq 1 ]]; then
OUTPUT_PATH=$1
fi
-pushd connector/connect/src/main
+pushd connector/connect/common/src/main
LICENSE=$(cat <<'EOF'
#
diff --git a/connector/connect/pom.xml b/connector/connect/server/pom.xml
similarity index 83%
rename from connector/connect/pom.xml
rename to connector/connect/server/pom.xml
index 7e44cae63bf..8cde8578144 100644
--- a/connector/connect/pom.xml
+++ b/connector/connect/server/pom.xml
@@ -23,7 +23,7 @@
<groupId>org.apache.spark</groupId>
<artifactId>spark-parent_2.12</artifactId>
<version>3.4.0-SNAPSHOT</version>
- <relativePath>../../pom.xml</relativePath>
+ <relativePath>../../../pom.xml</relativePath>
</parent>
<artifactId>spark-connect_2.12</artifactId>
@@ -51,6 +51,17 @@
</exclusion>
</exclusions>
</dependency>
+ <dependency>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-connect-common_${scala.binary.version}</artifactId>
+ <version>${project.version}</version>
+ <exclusions>
+ <exclusion>
+ <groupId>com.google.guava</groupId>
+ <artifactId>guava</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_${scala.binary.version}</artifactId>
@@ -188,14 +199,6 @@
</dependencies>
<build>
- <!-- Protobuf compilation for Spark Connect -->
- <extensions>
- <extension>
- <groupId>kr.motd.maven</groupId>
- <artifactId>os-maven-plugin</artifactId>
- <version>1.6.2</version>
- </extension>
- </extensions>
<outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
<testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
<plugins>
@@ -349,68 +352,4 @@
</plugin>
</plugins>
</build>
- <profiles>
- <profile>
- <id>default-protoc</id>
- <activation>
- <activeByDefault>true</activeByDefault>
- </activation>
- <build>
- <plugins>
- <!-- Add protobuf-maven-plugin and provide ScalaPB as a code
generation plugin -->
- <plugin>
- <groupId>org.xolstice.maven.plugins</groupId>
- <artifactId>protobuf-maven-plugin</artifactId>
- <version>0.6.1</version>
- <configuration>
-
<protocArtifact>com.google.protobuf:protoc:${protobuf.version}:exe:${os.detected.classifier}</protocArtifact>
- <pluginId>grpc-java</pluginId>
-
<pluginArtifact>io.grpc:protoc-gen-grpc-java:${io.grpc.version}:exe:${os.detected.classifier}</pluginArtifact>
- <protoSourceRoot>src/main/protobuf</protoSourceRoot>
- </configuration>
- <executions>
- <execution>
- <goals>
- <goal>compile</goal>
- <goal>compile-custom</goal>
- <goal>test-compile</goal>
- </goals>
- </execution>
- </executions>
- </plugin>
- </plugins>
- </build>
- </profile>
- <profile>
- <id>user-defined-protoc</id>
- <properties>
-
<connect.protoc.executable.path>${env.CONNECT_PROTOC_EXEC_PATH}</connect.protoc.executable.path>
-
<connect.plugin.executable.path>${env.CONNECT_PLUGIN_EXEC_PATH}</connect.plugin.executable.path>
- </properties>
- <build>
- <plugins>
- <plugin>
- <groupId>org.xolstice.maven.plugins</groupId>
- <artifactId>protobuf-maven-plugin</artifactId>
- <version>0.6.1</version>
- <configuration>
-
<protocExecutable>${connect.protoc.executable.path}</protocExecutable>
- <pluginId>grpc-java</pluginId>
-
<pluginExecutable>${connect.plugin.executable.path}</pluginExecutable>
- <protoSourceRoot>src/main/protobuf</protoSourceRoot>
- </configuration>
- <executions>
- <execution>
- <goals>
- <goal>compile</goal>
- <goal>compile-custom</goal>
- <goal>test-compile</goal>
- </goals>
- </execution>
- </executions>
- </plugin>
- </plugins>
- </build>
- </profile>
- </profiles>
</project>
diff --git
a/connector/connect/src/main/scala/org/apache/spark/sql/connect/SparkConnectPlugin.scala
b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/SparkConnectPlugin.scala
similarity index 100%
rename from
connector/connect/src/main/scala/org/apache/spark/sql/connect/SparkConnectPlugin.scala
rename to
connector/connect/server/src/main/scala/org/apache/spark/sql/connect/SparkConnectPlugin.scala
diff --git
a/connector/connect/src/main/scala/org/apache/spark/sql/connect/config/Connect.scala
b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/config/Connect.scala
similarity index 100%
rename from
connector/connect/src/main/scala/org/apache/spark/sql/connect/config/Connect.scala
rename to
connector/connect/server/src/main/scala/org/apache/spark/sql/connect/config/Connect.scala
diff --git
a/connector/connect/src/main/scala/org/apache/spark/sql/connect/dsl/package.scala
b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/dsl/package.scala
similarity index 100%
rename from
connector/connect/src/main/scala/org/apache/spark/sql/connect/dsl/package.scala
rename to
connector/connect/server/src/main/scala/org/apache/spark/sql/connect/dsl/package.scala
diff --git
a/connector/connect/src/main/scala/org/apache/spark/sql/connect/planner/DataTypeProtoConverter.scala
b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/DataTypeProtoConverter.scala
similarity index 100%
rename from
connector/connect/src/main/scala/org/apache/spark/sql/connect/planner/DataTypeProtoConverter.scala
rename to
connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/DataTypeProtoConverter.scala
diff --git
a/connector/connect/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala
b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala
similarity index 100%
rename from
connector/connect/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala
rename to
connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala
diff --git
a/connector/connect/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectInterceptorRegistry.scala
b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectInterceptorRegistry.scala
similarity index 100%
rename from
connector/connect/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectInterceptorRegistry.scala
rename to
connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectInterceptorRegistry.scala
diff --git
a/connector/connect/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectService.scala
b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectService.scala
similarity index 100%
rename from
connector/connect/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectService.scala
rename to
connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectService.scala
diff --git
a/connector/connect/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectStreamHandler.scala
b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectStreamHandler.scala
similarity index 100%
rename from
connector/connect/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectStreamHandler.scala
rename to
connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectStreamHandler.scala
diff --git a/connector/connect/src/test/resources/log4j2.properties
b/connector/connect/server/src/test/resources/log4j2.properties
similarity index 100%
rename from connector/connect/src/test/resources/log4j2.properties
rename to connector/connect/server/src/test/resources/log4j2.properties
diff --git
a/connector/connect/src/test/scala/org/apache/spark/sql/connect/messages/ConnectProtoMessagesSuite.scala
b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/messages/ConnectProtoMessagesSuite.scala
similarity index 100%
rename from
connector/connect/src/test/scala/org/apache/spark/sql/connect/messages/ConnectProtoMessagesSuite.scala
rename to
connector/connect/server/src/test/scala/org/apache/spark/sql/connect/messages/ConnectProtoMessagesSuite.scala
diff --git
a/connector/connect/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectPlannerSuite.scala
b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectPlannerSuite.scala
similarity index 100%
rename from
connector/connect/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectPlannerSuite.scala
rename to
connector/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectPlannerSuite.scala
diff --git
a/connector/connect/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectProtoSuite.scala
b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectProtoSuite.scala
similarity index 100%
rename from
connector/connect/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectProtoSuite.scala
rename to
connector/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectProtoSuite.scala
diff --git
a/connector/connect/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectServiceSuite.scala
b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectServiceSuite.scala
similarity index 100%
rename from
connector/connect/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectServiceSuite.scala
rename to
connector/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectServiceSuite.scala
diff --git
a/connector/connect/src/test/scala/org/apache/spark/sql/connect/service/InterceptorRegistrySuite.scala
b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/service/InterceptorRegistrySuite.scala
similarity index 100%
rename from
connector/connect/src/test/scala/org/apache/spark/sql/connect/service/InterceptorRegistrySuite.scala
rename to
connector/connect/server/src/test/scala/org/apache/spark/sql/connect/service/InterceptorRegistrySuite.scala
diff --git a/pom.xml b/pom.xml
index 9e56560cd74..b2e5979f467 100644
--- a/pom.xml
+++ b/pom.xml
@@ -100,7 +100,8 @@
<module>connector/kafka-0-10-assembly</module>
<module>connector/kafka-0-10-sql</module>
<module>connector/avro</module>
- <module>connector/connect</module>
+ <module>connector/connect/server</module>
+ <module>connector/connect/common</module>
<module>connector/protobuf</module>
<!-- See additional modules enabled by profiles below -->
</modules>
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 0f5c84bcc8e..e6a39714e61 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -52,6 +52,7 @@ object BuildCommons {
val streamingProjects@Seq(streaming, streamingKafka010) =
Seq("streaming", "streaming-kafka-0-10").map(ProjectRef(buildLocation, _))
+ val connectCommon = ProjectRef(buildLocation, "connect-common")
val connect = ProjectRef(buildLocation, "connect")
val allProjects@Seq(
@@ -59,7 +60,7 @@ object BuildCommons {
) = Seq(
"core", "graphx", "mllib", "mllib-local", "repl", "network-common",
"network-shuffle", "launcher", "unsafe",
"tags", "sketch", "kvstore"
- ).map(ProjectRef(buildLocation, _)) ++ sqlProjects ++ streamingProjects ++
Seq(connect)
+ ).map(ProjectRef(buildLocation, _)) ++ sqlProjects ++ streamingProjects ++
Seq(connectCommon, connect)
val optionallyEnabledProjects@Seq(kubernetes, mesos, yarn,
sparkGangliaLgpl, streamingKinesisAsl,
@@ -403,7 +404,7 @@ object SparkBuild extends PomBuild {
val mimaProjects = allProjects.filterNot { x =>
Seq(
spark, hive, hiveThriftServer, repl, networkCommon, networkShuffle,
networkYarn,
- unsafe, tags, tokenProviderKafka010, sqlKafka010, connect, protobuf
+ unsafe, tags, tokenProviderKafka010, sqlKafka010, connectCommon,
connect, protobuf
).contains(x)
}
@@ -444,6 +445,7 @@ object SparkBuild extends PomBuild {
/* Hive console settings */
enable(Hive.settings)(hive)
+ enable(SparkConnectCommon.settings)(connectCommon)
enable(SparkConnect.settings)(connect)
/* Protobuf settings */
@@ -645,8 +647,7 @@ object Core {
)
}
-
-object SparkConnect {
+object SparkConnectCommon {
import BuildCommons.protoVersion
lazy val settings = Seq(
@@ -701,6 +702,85 @@ object SparkConnect {
}
},
+ (assembly / assemblyMergeStrategy) := {
+ case m if m.toLowerCase(Locale.ROOT).endsWith("manifest.mf") =>
MergeStrategy.discard
+ // Drop all proto files that are not needed as artifacts of the build.
+ case m if m.toLowerCase(Locale.ROOT).endsWith(".proto") =>
MergeStrategy.discard
+ case _ => MergeStrategy.first
+ }
+ ) ++ {
+ val connectProtocExecPath = sys.props.get("connect.protoc.executable.path")
+ val connectPluginExecPath = sys.props.get("connect.plugin.executable.path")
+ if (connectProtocExecPath.isDefined && connectPluginExecPath.isDefined) {
+ Seq(
+ (Compile / PB.targets) := Seq(
+ PB.gens.java -> (Compile / sourceManaged).value,
+ PB.gens.plugin(name = "grpc-java", path = connectPluginExecPath.get)
-> (Compile / sourceManaged).value
+ ),
+ PB.protocExecutable := file(connectProtocExecPath.get)
+ )
+ } else {
+ Seq(
+ (Compile / PB.targets) := Seq(
+ PB.gens.java -> (Compile / sourceManaged).value,
+ PB.gens.plugin("grpc-java") -> (Compile / sourceManaged).value
+ )
+ )
+ }
+ }
+}
+
+object SparkConnect {
+ import BuildCommons.protoVersion
+
+ lazy val settings = Seq(
+ // For some reason the resolution from the imported Maven build does not
work for some
+ // of these dependendencies that we need to shade later on.
+ libraryDependencies ++= {
+ val guavaVersion =
+
SbtPomKeys.effectivePom.value.getProperties.get("guava.version").asInstanceOf[String]
+ val guavaFailureaccessVersion =
+ SbtPomKeys.effectivePom.value.getProperties.get(
+ "guava.failureaccess.version").asInstanceOf[String]
+ Seq(
+ "io.grpc" % "protoc-gen-grpc-java" % BuildCommons.gprcVersion
asProtocPlugin(),
+ "com.google.guava" % "guava" % guavaVersion,
+ "com.google.guava" % "failureaccess" % guavaFailureaccessVersion,
+ "com.google.protobuf" % "protobuf-java" % protoVersion % "protobuf"
+ )
+ },
+
+ dependencyOverrides ++= {
+ val guavaVersion =
+
SbtPomKeys.effectivePom.value.getProperties.get("guava.version").asInstanceOf[String]
+ val guavaFailureaccessVersion =
+ SbtPomKeys.effectivePom.value.getProperties.get(
+ "guava.failureaccess.version").asInstanceOf[String]
+ Seq(
+ "com.google.guava" % "guava" % guavaVersion,
+ "com.google.guava" % "failureaccess" % guavaFailureaccessVersion,
+ "com.google.protobuf" % "protobuf-java" % protoVersion
+ )
+ },
+
+ (assembly / test) := { },
+
+ (assembly / logLevel) := Level.Info,
+
+ // Exclude `scala-library` from assembly.
+ (assembly / assemblyPackageScala / assembleArtifact) := false,
+
+ // Exclude `pmml-model-*.jar`,
`scala-collection-compat_*.jar`,`jsr305-*.jar` and
+ // `netty-*.jar` and `unused-1.0.0.jar` from assembly.
+ (assembly / assemblyExcludedJars) := {
+ val cp = (assembly / fullClasspath).value
+ cp filter { v =>
+ val name = v.data.getName
+ name.startsWith("pmml-model-") ||
name.startsWith("scala-collection-compat_") ||
+ name.startsWith("jsr305-") || name.startsWith("netty-") || name ==
"unused-1.0.0.jar"
+ }
+ },
+
(assembly / assemblyShadeRules) := Seq(
ShadeRule.rename("io.grpc.**" ->
"org.sparkproject.connect.grpc.@0").inAll,
ShadeRule.rename("com.google.common.**" ->
"org.sparkproject.connect.guava.@1").inAll,
@@ -729,24 +809,12 @@ object SparkConnect {
case _ => MergeStrategy.first
}
) ++ {
- val connectProtocExecPath = sys.props.get("connect.protoc.executable.path")
- val connectPluginExecPath = sys.props.get("connect.plugin.executable.path")
- if (connectProtocExecPath.isDefined && connectPluginExecPath.isDefined) {
- Seq(
- (Compile / PB.targets) := Seq(
- PB.gens.java -> (Compile / sourceManaged).value,
- PB.gens.plugin(name = "grpc-java", path = connectPluginExecPath.get)
-> (Compile / sourceManaged).value
- ),
- PB.protocExecutable := file(connectProtocExecPath.get)
- )
- } else {
- Seq(
- (Compile / PB.targets) := Seq(
- PB.gens.java -> (Compile / sourceManaged).value,
- PB.gens.plugin("grpc-java") -> (Compile / sourceManaged).value
- )
+ Seq(
+ (Compile / PB.targets) := Seq(
+ PB.gens.java -> (Compile / sourceManaged).value,
+ PB.gens.plugin("grpc-java") -> (Compile / sourceManaged).value
)
- }
+ )
}
}
@@ -1256,10 +1324,10 @@ object Unidoc {
(ScalaUnidoc / unidoc / unidocProjectFilter) :=
inAnyProject -- inProjects(OldDeps.project, repl, examples, tools,
kubernetes,
- yarn, tags, streamingKafka010, sqlKafka010, connect, protobuf),
+ yarn, tags, streamingKafka010, sqlKafka010, connectCommon, connect,
protobuf),
(JavaUnidoc / unidoc / unidocProjectFilter) :=
inAnyProject -- inProjects(OldDeps.project, repl, examples, tools,
kubernetes,
- yarn, tags, streamingKafka010, sqlKafka010, connect, protobuf),
+ yarn, tags, streamingKafka010, sqlKafka010, connectCommon, connect,
protobuf),
(ScalaUnidoc / unidoc / unidocAllClasspaths) := {
ignoreClasspaths((ScalaUnidoc / unidoc / unidocAllClasspaths).value)
diff --git a/python/pyspark/testing/connectutils.py
b/python/pyspark/testing/connectutils.py
index 7f4250613cc..1979b6eb723 100644
--- a/python/pyspark/testing/connectutils.py
+++ b/python/pyspark/testing/connectutils.py
@@ -28,7 +28,7 @@ if have_pandas:
from pyspark.sql.connect.plan import LogicalPlan
from pyspark.sql.connect.session import SparkSession
- connect_jar = search_jar("connector/connect", "spark-connect-assembly-",
"spark-connect")
+ connect_jar = search_jar("connector/connect/server",
"spark-connect-assembly-", "spark-connect")
else:
connect_jar = None
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]