This is an automated email from the ASF dual-hosted git repository. hvanhovell pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new ad503ca70ac [SPARK-41369][CONNECT][BUILD] Split connect project into common and server projects ad503ca70ac is described below commit ad503ca70acd3d5e3d815efac6f675b14797ded1 Author: vicennial <venkata.gud...@databricks.com> AuthorDate: Tue Dec 6 17:06:31 2022 -0400 [SPARK-41369][CONNECT][BUILD] Split connect project into common and server projects ### What changes were proposed in this pull request? We split the current `connector/connect` project into two projects: - `connector/connect/common`: this contains the proto definitions, and build targets for proto buf related code generation. In the future this can also contain utilities that are shared between the server and the client. - `connector/connect/server`: this contains the code for the connect server and plugin (all the current scala code). This includes the dsl because it is used for server related tests. In the future we might replace the DSL by the scala client. ### Why are the changes needed? In preparation for the Spark Connect Scala Client we need to split the server and the proto files. This, together with another refactoring in Catalyst, will allow us to create a client with a minimal set of dependencies. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Existing tests Closes #38944 from hvanhovell/refactorConnect-finalShade. Lead-authored-by: vicennial <venkata.gud...@databricks.com> Co-authored-by: Herman van Hovell <her...@databricks.com> Signed-off-by: Herman van Hovell <her...@databricks.com> --- connector/connect/common/pom.xml | 225 +++++++++++++++++++++ .../connect/{ => common}/src/main/buf.gen.yaml | 0 .../connect/{ => common}/src/main/buf.work.yaml | 0 .../{ => common}/src/main/protobuf/buf.yaml | 0 .../src/main/protobuf/spark/connect/base.proto | 0 .../src/main/protobuf/spark/connect/commands.proto | 0 .../main/protobuf/spark/connect/expressions.proto | 0 .../main/protobuf/spark/connect/relations.proto | 0 .../src/main/protobuf/spark/connect/types.proto | 0 connector/connect/dev/generate_protos.sh | 2 +- connector/connect/{ => server}/pom.xml | 85 ++------ .../spark/sql/connect/SparkConnectPlugin.scala | 0 .../apache/spark/sql/connect/config/Connect.scala | 0 .../org/apache/spark/sql/connect/dsl/package.scala | 0 .../connect/planner/DataTypeProtoConverter.scala | 0 .../sql/connect/planner/SparkConnectPlanner.scala | 0 .../service/SparkConnectInterceptorRegistry.scala | 0 .../sql/connect/service/SparkConnectService.scala | 0 .../service/SparkConnectStreamHandler.scala | 0 .../src/test/resources/log4j2.properties | 0 .../messages/ConnectProtoMessagesSuite.scala | 0 .../connect/planner/SparkConnectPlannerSuite.scala | 0 .../connect/planner/SparkConnectProtoSuite.scala | 0 .../connect/planner/SparkConnectServiceSuite.scala | 0 .../connect/service/InterceptorRegistrySuite.scala | 0 pom.xml | 3 +- project/SparkBuild.scala | 114 ++++++++--- python/pyspark/testing/connectutils.py | 2 +- 28 files changed, 332 insertions(+), 99 deletions(-) diff --git a/connector/connect/common/pom.xml b/connector/connect/common/pom.xml new file mode 100644 index 00000000000..555afd5bc44 --- /dev/null +++ b/connector/connect/common/pom.xml @@ -0,0 +1,225 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- + ~ Licensed to the Apache Software Foundation (ASF) under one or more + ~ contributor license agreements. See the NOTICE file distributed with + ~ this work for additional information regarding copyright ownership. + ~ The ASF licenses this file to You under the Apache License, Version 2.0 + ~ (the "License"); you may not use this file except in compliance with + ~ the License. You may obtain a copy of the License at + ~ + ~ http://www.apache.org/licenses/LICENSE-2.0 + ~ + ~ Unless required by applicable law or agreed to in writing, software + ~ distributed under the License is distributed on an "AS IS" BASIS, + ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ~ See the License for the specific language governing permissions and + ~ limitations under the License. + --> + +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <modelVersion>4.0.0</modelVersion> + <parent> + <groupId>org.apache.spark</groupId> + <artifactId>spark-parent_2.12</artifactId> + <version>3.4.0-SNAPSHOT</version> + <relativePath>../../../pom.xml</relativePath> + </parent> + + <artifactId>spark-connect-common_2.12</artifactId> + <packaging>jar</packaging> + <name>Spark Project Connect Common</name> + <url>https://spark.apache.org/</url> + <properties> + <sbt.project.name>connect-common</sbt.project.name> + <guava.version>31.0.1-jre</guava.version> + <guava.failureaccess.version>1.0.1</guava.failureaccess.version> + <io.grpc.version>1.47.0</io.grpc.version> + <tomcat.annotations.api.version>6.0.53</tomcat.annotations.api.version> + </properties> + <dependencies> + <dependency> + <groupId>org.scala-lang</groupId> + <artifactId>scala-library</artifactId> + </dependency> + <dependency> + <groupId>com.google.guava</groupId> + <artifactId>guava</artifactId> + <version>${guava.version}</version> + <scope>compile</scope> + </dependency> + <dependency> + <groupId>com.google.guava</groupId> + <artifactId>failureaccess</artifactId> + <version>${guava.failureaccess.version}</version> + </dependency> + <dependency> + <groupId>com.google.protobuf</groupId> + <artifactId>protobuf-java</artifactId> + <version>${protobuf.version}</version> + <scope>compile</scope> + </dependency> + <dependency> + <groupId>io.grpc</groupId> + <artifactId>grpc-netty</artifactId> + <version>${io.grpc.version}</version> + </dependency> + <dependency> + <groupId>io.grpc</groupId> + <artifactId>grpc-protobuf</artifactId> + <version>${io.grpc.version}</version> + </dependency> + <dependency> + <groupId>io.grpc</groupId> + <artifactId>grpc-services</artifactId> + <version>${io.grpc.version}</version> + </dependency> + <dependency> + <groupId>io.grpc</groupId> + <artifactId>grpc-stub</artifactId> + <version>${io.grpc.version}</version> + </dependency> + <dependency> + <groupId>io.netty</groupId> + <artifactId>netty-codec-http2</artifactId> + <version>${netty.version}</version> + <scope>provided</scope> + </dependency> + <dependency> + <groupId>io.netty</groupId> + <artifactId>netty-handler-proxy</artifactId> + <version>${netty.version}</version> + <scope>provided</scope> + </dependency> + <dependency> + <groupId>io.netty</groupId> + <artifactId>netty-transport-native-unix-common</artifactId> + <version>${netty.version}</version> + <scope>provided</scope> + </dependency> + <dependency> <!-- necessary for Java 9+ --> + <groupId>org.apache.tomcat</groupId> + <artifactId>annotations-api</artifactId> + <version>${tomcat.annotations.api.version}</version> + <scope>provided</scope> + </dependency> + <!-- + This spark-tags test-dep is needed even though it isn't used in this module, + otherwise testing-cmds that excludethem will yield errors. + --> + <dependency> + <groupId>org.apache.spark</groupId> + <artifactId>spark-tags_${scala.binary.version}</artifactId> + <type>test-jar</type> + <scope>test</scope> + </dependency> + </dependencies> + <build> + <!-- Protobuf compilation for Spark Connect --> + <extensions> + <extension> + <groupId>kr.motd.maven</groupId> + <artifactId>os-maven-plugin</artifactId> + <version>1.6.2</version> + </extension> + </extensions> + <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory> + <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory> + <plugins> + <plugin> + <groupId>org.codehaus.mojo</groupId> + <artifactId>build-helper-maven-plugin</artifactId> + <executions> + <execution> + <id>add-sources</id> + <phase>generate-sources</phase> + <goals> + <goal>add-source</goal> + </goals> + <configuration> + <sources> + <source>src/main/scala-${scala.binary.version}</source> + </sources> + </configuration> + </execution> + <execution> + <id>add-scala-test-sources</id> + <phase>generate-test-sources</phase> + <goals> + <goal>add-test-source</goal> + </goals> + <configuration> + <sources> + <source>src/test/gen-java</source> + </sources> + </configuration> + </execution> + </executions> + </plugin> + </plugins> + </build> + <profiles> + <profile> + <id>default-protoc</id> + <activation> + <activeByDefault>true</activeByDefault> + </activation> + <build> + <plugins> + <!-- Add protobuf-maven-plugin and provide ScalaPB as a code generation plugin --> + <plugin> + <groupId>org.xolstice.maven.plugins</groupId> + <artifactId>protobuf-maven-plugin</artifactId> + <version>0.6.1</version> + <configuration> + <protocArtifact>com.google.protobuf:protoc:${protobuf.version}:exe:${os.detected.classifier}</protocArtifact> + <pluginId>grpc-java</pluginId> + <pluginArtifact>io.grpc:protoc-gen-grpc-java:${io.grpc.version}:exe:${os.detected.classifier}</pluginArtifact> + <protoSourceRoot>src/main/protobuf</protoSourceRoot> + </configuration> + <executions> + <execution> + <goals> + <goal>compile</goal> + <goal>compile-custom</goal> + <goal>test-compile</goal> + </goals> + </execution> + </executions> + </plugin> + </plugins> + </build> + </profile> + <profile> + <id>user-defined-protoc</id> + <properties> + <connect.protoc.executable.path>${env.CONNECT_PROTOC_EXEC_PATH}</connect.protoc.executable.path> + <connect.plugin.executable.path>${env.CONNECT_PLUGIN_EXEC_PATH}</connect.plugin.executable.path> + </properties> + <build> + <plugins> + <plugin> + <groupId>org.xolstice.maven.plugins</groupId> + <artifactId>protobuf-maven-plugin</artifactId> + <version>0.6.1</version> + <configuration> + <protocExecutable>${connect.protoc.executable.path}</protocExecutable> + <pluginId>grpc-java</pluginId> + <pluginExecutable>${connect.plugin.executable.path}</pluginExecutable> + <protoSourceRoot>src/main/protobuf</protoSourceRoot> + </configuration> + <executions> + <execution> + <goals> + <goal>compile</goal> + <goal>compile-custom</goal> + <goal>test-compile</goal> + </goals> + </execution> + </executions> + </plugin> + </plugins> + </build> + </profile> + </profiles> +</project> diff --git a/connector/connect/src/main/buf.gen.yaml b/connector/connect/common/src/main/buf.gen.yaml similarity index 100% rename from connector/connect/src/main/buf.gen.yaml rename to connector/connect/common/src/main/buf.gen.yaml diff --git a/connector/connect/src/main/buf.work.yaml b/connector/connect/common/src/main/buf.work.yaml similarity index 100% rename from connector/connect/src/main/buf.work.yaml rename to connector/connect/common/src/main/buf.work.yaml diff --git a/connector/connect/src/main/protobuf/buf.yaml b/connector/connect/common/src/main/protobuf/buf.yaml similarity index 100% rename from connector/connect/src/main/protobuf/buf.yaml rename to connector/connect/common/src/main/protobuf/buf.yaml diff --git a/connector/connect/src/main/protobuf/spark/connect/base.proto b/connector/connect/common/src/main/protobuf/spark/connect/base.proto similarity index 100% rename from connector/connect/src/main/protobuf/spark/connect/base.proto rename to connector/connect/common/src/main/protobuf/spark/connect/base.proto diff --git a/connector/connect/src/main/protobuf/spark/connect/commands.proto b/connector/connect/common/src/main/protobuf/spark/connect/commands.proto similarity index 100% rename from connector/connect/src/main/protobuf/spark/connect/commands.proto rename to connector/connect/common/src/main/protobuf/spark/connect/commands.proto diff --git a/connector/connect/src/main/protobuf/spark/connect/expressions.proto b/connector/connect/common/src/main/protobuf/spark/connect/expressions.proto similarity index 100% rename from connector/connect/src/main/protobuf/spark/connect/expressions.proto rename to connector/connect/common/src/main/protobuf/spark/connect/expressions.proto diff --git a/connector/connect/src/main/protobuf/spark/connect/relations.proto b/connector/connect/common/src/main/protobuf/spark/connect/relations.proto similarity index 100% rename from connector/connect/src/main/protobuf/spark/connect/relations.proto rename to connector/connect/common/src/main/protobuf/spark/connect/relations.proto diff --git a/connector/connect/src/main/protobuf/spark/connect/types.proto b/connector/connect/common/src/main/protobuf/spark/connect/types.proto similarity index 100% rename from connector/connect/src/main/protobuf/spark/connect/types.proto rename to connector/connect/common/src/main/protobuf/spark/connect/types.proto diff --git a/connector/connect/dev/generate_protos.sh b/connector/connect/dev/generate_protos.sh index d327fdb0dac..38cb821a47c 100755 --- a/connector/connect/dev/generate_protos.sh +++ b/connector/connect/dev/generate_protos.sh @@ -36,7 +36,7 @@ if [[ $# -eq 1 ]]; then OUTPUT_PATH=$1 fi -pushd connector/connect/src/main +pushd connector/connect/common/src/main LICENSE=$(cat <<'EOF' # diff --git a/connector/connect/pom.xml b/connector/connect/server/pom.xml similarity index 83% rename from connector/connect/pom.xml rename to connector/connect/server/pom.xml index 7e44cae63bf..8cde8578144 100644 --- a/connector/connect/pom.xml +++ b/connector/connect/server/pom.xml @@ -23,7 +23,7 @@ <groupId>org.apache.spark</groupId> <artifactId>spark-parent_2.12</artifactId> <version>3.4.0-SNAPSHOT</version> - <relativePath>../../pom.xml</relativePath> + <relativePath>../../../pom.xml</relativePath> </parent> <artifactId>spark-connect_2.12</artifactId> @@ -51,6 +51,17 @@ </exclusion> </exclusions> </dependency> + <dependency> + <groupId>org.apache.spark</groupId> + <artifactId>spark-connect-common_${scala.binary.version}</artifactId> + <version>${project.version}</version> + <exclusions> + <exclusion> + <groupId>com.google.guava</groupId> + <artifactId>guava</artifactId> + </exclusion> + </exclusions> + </dependency> <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-core_${scala.binary.version}</artifactId> @@ -188,14 +199,6 @@ </dependencies> <build> - <!-- Protobuf compilation for Spark Connect --> - <extensions> - <extension> - <groupId>kr.motd.maven</groupId> - <artifactId>os-maven-plugin</artifactId> - <version>1.6.2</version> - </extension> - </extensions> <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory> <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory> <plugins> @@ -349,68 +352,4 @@ </plugin> </plugins> </build> - <profiles> - <profile> - <id>default-protoc</id> - <activation> - <activeByDefault>true</activeByDefault> - </activation> - <build> - <plugins> - <!-- Add protobuf-maven-plugin and provide ScalaPB as a code generation plugin --> - <plugin> - <groupId>org.xolstice.maven.plugins</groupId> - <artifactId>protobuf-maven-plugin</artifactId> - <version>0.6.1</version> - <configuration> - <protocArtifact>com.google.protobuf:protoc:${protobuf.version}:exe:${os.detected.classifier}</protocArtifact> - <pluginId>grpc-java</pluginId> - <pluginArtifact>io.grpc:protoc-gen-grpc-java:${io.grpc.version}:exe:${os.detected.classifier}</pluginArtifact> - <protoSourceRoot>src/main/protobuf</protoSourceRoot> - </configuration> - <executions> - <execution> - <goals> - <goal>compile</goal> - <goal>compile-custom</goal> - <goal>test-compile</goal> - </goals> - </execution> - </executions> - </plugin> - </plugins> - </build> - </profile> - <profile> - <id>user-defined-protoc</id> - <properties> - <connect.protoc.executable.path>${env.CONNECT_PROTOC_EXEC_PATH}</connect.protoc.executable.path> - <connect.plugin.executable.path>${env.CONNECT_PLUGIN_EXEC_PATH}</connect.plugin.executable.path> - </properties> - <build> - <plugins> - <plugin> - <groupId>org.xolstice.maven.plugins</groupId> - <artifactId>protobuf-maven-plugin</artifactId> - <version>0.6.1</version> - <configuration> - <protocExecutable>${connect.protoc.executable.path}</protocExecutable> - <pluginId>grpc-java</pluginId> - <pluginExecutable>${connect.plugin.executable.path}</pluginExecutable> - <protoSourceRoot>src/main/protobuf</protoSourceRoot> - </configuration> - <executions> - <execution> - <goals> - <goal>compile</goal> - <goal>compile-custom</goal> - <goal>test-compile</goal> - </goals> - </execution> - </executions> - </plugin> - </plugins> - </build> - </profile> - </profiles> </project> diff --git a/connector/connect/src/main/scala/org/apache/spark/sql/connect/SparkConnectPlugin.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/SparkConnectPlugin.scala similarity index 100% rename from connector/connect/src/main/scala/org/apache/spark/sql/connect/SparkConnectPlugin.scala rename to connector/connect/server/src/main/scala/org/apache/spark/sql/connect/SparkConnectPlugin.scala diff --git a/connector/connect/src/main/scala/org/apache/spark/sql/connect/config/Connect.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/config/Connect.scala similarity index 100% rename from connector/connect/src/main/scala/org/apache/spark/sql/connect/config/Connect.scala rename to connector/connect/server/src/main/scala/org/apache/spark/sql/connect/config/Connect.scala diff --git a/connector/connect/src/main/scala/org/apache/spark/sql/connect/dsl/package.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/dsl/package.scala similarity index 100% rename from connector/connect/src/main/scala/org/apache/spark/sql/connect/dsl/package.scala rename to connector/connect/server/src/main/scala/org/apache/spark/sql/connect/dsl/package.scala diff --git a/connector/connect/src/main/scala/org/apache/spark/sql/connect/planner/DataTypeProtoConverter.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/DataTypeProtoConverter.scala similarity index 100% rename from connector/connect/src/main/scala/org/apache/spark/sql/connect/planner/DataTypeProtoConverter.scala rename to connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/DataTypeProtoConverter.scala diff --git a/connector/connect/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala similarity index 100% rename from connector/connect/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala rename to connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala diff --git a/connector/connect/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectInterceptorRegistry.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectInterceptorRegistry.scala similarity index 100% rename from connector/connect/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectInterceptorRegistry.scala rename to connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectInterceptorRegistry.scala diff --git a/connector/connect/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectService.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectService.scala similarity index 100% rename from connector/connect/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectService.scala rename to connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectService.scala diff --git a/connector/connect/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectStreamHandler.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectStreamHandler.scala similarity index 100% rename from connector/connect/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectStreamHandler.scala rename to connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectStreamHandler.scala diff --git a/connector/connect/src/test/resources/log4j2.properties b/connector/connect/server/src/test/resources/log4j2.properties similarity index 100% rename from connector/connect/src/test/resources/log4j2.properties rename to connector/connect/server/src/test/resources/log4j2.properties diff --git a/connector/connect/src/test/scala/org/apache/spark/sql/connect/messages/ConnectProtoMessagesSuite.scala b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/messages/ConnectProtoMessagesSuite.scala similarity index 100% rename from connector/connect/src/test/scala/org/apache/spark/sql/connect/messages/ConnectProtoMessagesSuite.scala rename to connector/connect/server/src/test/scala/org/apache/spark/sql/connect/messages/ConnectProtoMessagesSuite.scala diff --git a/connector/connect/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectPlannerSuite.scala b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectPlannerSuite.scala similarity index 100% rename from connector/connect/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectPlannerSuite.scala rename to connector/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectPlannerSuite.scala diff --git a/connector/connect/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectProtoSuite.scala b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectProtoSuite.scala similarity index 100% rename from connector/connect/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectProtoSuite.scala rename to connector/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectProtoSuite.scala diff --git a/connector/connect/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectServiceSuite.scala b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectServiceSuite.scala similarity index 100% rename from connector/connect/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectServiceSuite.scala rename to connector/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectServiceSuite.scala diff --git a/connector/connect/src/test/scala/org/apache/spark/sql/connect/service/InterceptorRegistrySuite.scala b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/service/InterceptorRegistrySuite.scala similarity index 100% rename from connector/connect/src/test/scala/org/apache/spark/sql/connect/service/InterceptorRegistrySuite.scala rename to connector/connect/server/src/test/scala/org/apache/spark/sql/connect/service/InterceptorRegistrySuite.scala diff --git a/pom.xml b/pom.xml index 9e56560cd74..b2e5979f467 100644 --- a/pom.xml +++ b/pom.xml @@ -100,7 +100,8 @@ <module>connector/kafka-0-10-assembly</module> <module>connector/kafka-0-10-sql</module> <module>connector/avro</module> - <module>connector/connect</module> + <module>connector/connect/server</module> + <module>connector/connect/common</module> <module>connector/protobuf</module> <!-- See additional modules enabled by profiles below --> </modules> diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index 0f5c84bcc8e..e6a39714e61 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -52,6 +52,7 @@ object BuildCommons { val streamingProjects@Seq(streaming, streamingKafka010) = Seq("streaming", "streaming-kafka-0-10").map(ProjectRef(buildLocation, _)) + val connectCommon = ProjectRef(buildLocation, "connect-common") val connect = ProjectRef(buildLocation, "connect") val allProjects@Seq( @@ -59,7 +60,7 @@ object BuildCommons { ) = Seq( "core", "graphx", "mllib", "mllib-local", "repl", "network-common", "network-shuffle", "launcher", "unsafe", "tags", "sketch", "kvstore" - ).map(ProjectRef(buildLocation, _)) ++ sqlProjects ++ streamingProjects ++ Seq(connect) + ).map(ProjectRef(buildLocation, _)) ++ sqlProjects ++ streamingProjects ++ Seq(connectCommon, connect) val optionallyEnabledProjects@Seq(kubernetes, mesos, yarn, sparkGangliaLgpl, streamingKinesisAsl, @@ -403,7 +404,7 @@ object SparkBuild extends PomBuild { val mimaProjects = allProjects.filterNot { x => Seq( spark, hive, hiveThriftServer, repl, networkCommon, networkShuffle, networkYarn, - unsafe, tags, tokenProviderKafka010, sqlKafka010, connect, protobuf + unsafe, tags, tokenProviderKafka010, sqlKafka010, connectCommon, connect, protobuf ).contains(x) } @@ -444,6 +445,7 @@ object SparkBuild extends PomBuild { /* Hive console settings */ enable(Hive.settings)(hive) + enable(SparkConnectCommon.settings)(connectCommon) enable(SparkConnect.settings)(connect) /* Protobuf settings */ @@ -645,8 +647,7 @@ object Core { ) } - -object SparkConnect { +object SparkConnectCommon { import BuildCommons.protoVersion lazy val settings = Seq( @@ -701,6 +702,85 @@ object SparkConnect { } }, + (assembly / assemblyMergeStrategy) := { + case m if m.toLowerCase(Locale.ROOT).endsWith("manifest.mf") => MergeStrategy.discard + // Drop all proto files that are not needed as artifacts of the build. + case m if m.toLowerCase(Locale.ROOT).endsWith(".proto") => MergeStrategy.discard + case _ => MergeStrategy.first + } + ) ++ { + val connectProtocExecPath = sys.props.get("connect.protoc.executable.path") + val connectPluginExecPath = sys.props.get("connect.plugin.executable.path") + if (connectProtocExecPath.isDefined && connectPluginExecPath.isDefined) { + Seq( + (Compile / PB.targets) := Seq( + PB.gens.java -> (Compile / sourceManaged).value, + PB.gens.plugin(name = "grpc-java", path = connectPluginExecPath.get) -> (Compile / sourceManaged).value + ), + PB.protocExecutable := file(connectProtocExecPath.get) + ) + } else { + Seq( + (Compile / PB.targets) := Seq( + PB.gens.java -> (Compile / sourceManaged).value, + PB.gens.plugin("grpc-java") -> (Compile / sourceManaged).value + ) + ) + } + } +} + +object SparkConnect { + import BuildCommons.protoVersion + + lazy val settings = Seq( + // For some reason the resolution from the imported Maven build does not work for some + // of these dependendencies that we need to shade later on. + libraryDependencies ++= { + val guavaVersion = + SbtPomKeys.effectivePom.value.getProperties.get("guava.version").asInstanceOf[String] + val guavaFailureaccessVersion = + SbtPomKeys.effectivePom.value.getProperties.get( + "guava.failureaccess.version").asInstanceOf[String] + Seq( + "io.grpc" % "protoc-gen-grpc-java" % BuildCommons.gprcVersion asProtocPlugin(), + "com.google.guava" % "guava" % guavaVersion, + "com.google.guava" % "failureaccess" % guavaFailureaccessVersion, + "com.google.protobuf" % "protobuf-java" % protoVersion % "protobuf" + ) + }, + + dependencyOverrides ++= { + val guavaVersion = + SbtPomKeys.effectivePom.value.getProperties.get("guava.version").asInstanceOf[String] + val guavaFailureaccessVersion = + SbtPomKeys.effectivePom.value.getProperties.get( + "guava.failureaccess.version").asInstanceOf[String] + Seq( + "com.google.guava" % "guava" % guavaVersion, + "com.google.guava" % "failureaccess" % guavaFailureaccessVersion, + "com.google.protobuf" % "protobuf-java" % protoVersion + ) + }, + + (assembly / test) := { }, + + (assembly / logLevel) := Level.Info, + + // Exclude `scala-library` from assembly. + (assembly / assemblyPackageScala / assembleArtifact) := false, + + // Exclude `pmml-model-*.jar`, `scala-collection-compat_*.jar`,`jsr305-*.jar` and + // `netty-*.jar` and `unused-1.0.0.jar` from assembly. + (assembly / assemblyExcludedJars) := { + val cp = (assembly / fullClasspath).value + cp filter { v => + val name = v.data.getName + name.startsWith("pmml-model-") || name.startsWith("scala-collection-compat_") || + name.startsWith("jsr305-") || name.startsWith("netty-") || name == "unused-1.0.0.jar" + } + }, + (assembly / assemblyShadeRules) := Seq( ShadeRule.rename("io.grpc.**" -> "org.sparkproject.connect.grpc.@0").inAll, ShadeRule.rename("com.google.common.**" -> "org.sparkproject.connect.guava.@1").inAll, @@ -729,24 +809,12 @@ object SparkConnect { case _ => MergeStrategy.first } ) ++ { - val connectProtocExecPath = sys.props.get("connect.protoc.executable.path") - val connectPluginExecPath = sys.props.get("connect.plugin.executable.path") - if (connectProtocExecPath.isDefined && connectPluginExecPath.isDefined) { - Seq( - (Compile / PB.targets) := Seq( - PB.gens.java -> (Compile / sourceManaged).value, - PB.gens.plugin(name = "grpc-java", path = connectPluginExecPath.get) -> (Compile / sourceManaged).value - ), - PB.protocExecutable := file(connectProtocExecPath.get) - ) - } else { - Seq( - (Compile / PB.targets) := Seq( - PB.gens.java -> (Compile / sourceManaged).value, - PB.gens.plugin("grpc-java") -> (Compile / sourceManaged).value - ) + Seq( + (Compile / PB.targets) := Seq( + PB.gens.java -> (Compile / sourceManaged).value, + PB.gens.plugin("grpc-java") -> (Compile / sourceManaged).value ) - } + ) } } @@ -1256,10 +1324,10 @@ object Unidoc { (ScalaUnidoc / unidoc / unidocProjectFilter) := inAnyProject -- inProjects(OldDeps.project, repl, examples, tools, kubernetes, - yarn, tags, streamingKafka010, sqlKafka010, connect, protobuf), + yarn, tags, streamingKafka010, sqlKafka010, connectCommon, connect, protobuf), (JavaUnidoc / unidoc / unidocProjectFilter) := inAnyProject -- inProjects(OldDeps.project, repl, examples, tools, kubernetes, - yarn, tags, streamingKafka010, sqlKafka010, connect, protobuf), + yarn, tags, streamingKafka010, sqlKafka010, connectCommon, connect, protobuf), (ScalaUnidoc / unidoc / unidocAllClasspaths) := { ignoreClasspaths((ScalaUnidoc / unidoc / unidocAllClasspaths).value) diff --git a/python/pyspark/testing/connectutils.py b/python/pyspark/testing/connectutils.py index 7f4250613cc..1979b6eb723 100644 --- a/python/pyspark/testing/connectutils.py +++ b/python/pyspark/testing/connectutils.py @@ -28,7 +28,7 @@ if have_pandas: from pyspark.sql.connect.plan import LogicalPlan from pyspark.sql.connect.session import SparkSession - connect_jar = search_jar("connector/connect", "spark-connect-assembly-", "spark-connect") + connect_jar = search_jar("connector/connect/server", "spark-connect-assembly-", "spark-connect") else: connect_jar = None --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org