This is an automated email from the ASF dual-hosted git repository. hvanhovell pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new b3276ef4e3a [SPARK-41534][CONNECT][SQL] Setup initial client module for Spark Connect b3276ef4e3a is described below commit b3276ef4e3a275110c02e27e4b95dc3596b71cf4 Author: vicennial <venkata.gud...@databricks.com> AuthorDate: Thu Dec 22 14:10:42 2022 -0400 [SPARK-41534][CONNECT][SQL] Setup initial client module for Spark Connect ### What changes were proposed in this pull request? This PR sets up the client module (primarily the build for SBT and Maven) in Spark Connect in preparation for the Scala/JVM client. Placeholder `SparkConnectClient` is added in this PR. ### Why are the changes needed? For the development of the Scala/JVM client for Spark Connect. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Existing build CI + placeholder test. Closes #39078 from vicennial/clientModule. Lead-authored-by: vicennial <venkata.gud...@databricks.com> Co-authored-by: Venkata Sai Akhil Gudesa <gvs.akhil1...@gmail.com> Signed-off-by: Herman van Hovell <her...@databricks.com> --- connector/connect/client/jvm/pom.xml | 94 ++++++++++++++++++++++ .../sql/connect/client/SparkConnectClient.scala | 46 +++++++++++ .../jvm/src/test/resources/log4j2.properties | 39 +++++++++ .../connect/client/SparkConnectClientSuite.scala | 32 ++++++++ pom.xml | 1 + project/SparkBuild.scala | 70 +++++++++++++++- 6 files changed, 278 insertions(+), 4 deletions(-) diff --git a/connector/connect/client/jvm/pom.xml b/connector/connect/client/jvm/pom.xml new file mode 100644 index 00000000000..39de7725de2 --- /dev/null +++ b/connector/connect/client/jvm/pom.xml @@ -0,0 +1,94 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- + ~ Licensed to the Apache Software Foundation (ASF) under one or more + ~ contributor license agreements. See the NOTICE file distributed with + ~ this work for additional information regarding copyright ownership. + ~ The ASF licenses this file to You under the Apache License, Version 2.0 + ~ (the "License"); you may not use this file except in compliance with + ~ the License. You may obtain a copy of the License at + ~ + ~ http://www.apache.org/licenses/LICENSE-2.0 + ~ + ~ Unless required by applicable law or agreed to in writing, software + ~ distributed under the License is distributed on an "AS IS" BASIS, + ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ~ See the License for the specific language governing permissions and + ~ limitations under the License. + --> + +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <modelVersion>4.0.0</modelVersion> + <parent> + <groupId>org.apache.spark</groupId> + <artifactId>spark-parent_2.12</artifactId> + <version>3.4.0-SNAPSHOT</version> + <relativePath>../../../../pom.xml</relativePath> + </parent> + + <artifactId>spark-connect-client-jvm_2.12</artifactId> + <packaging>jar</packaging> + <name>Spark Project Connect Client</name> + <url>https://spark.apache.org/</url> + <properties> + <sbt.project.name>connect-client-jvm</sbt.project.name> + </properties> + + <dependencies> + <dependency> + <groupId>org.apache.spark</groupId> + <artifactId>spark-connect-common_${scala.binary.version}</artifactId> + <version>${project.version}</version> + <exclusions> + <exclusion> + <groupId>com.google.guava</groupId> + <artifactId>guava</artifactId> + </exclusion> + </exclusions> + </dependency> + <dependency> + <groupId>com.google.protobuf</groupId> + <artifactId>protobuf-java</artifactId> + <version>${protobuf.version}</version> + <scope>compile</scope> + </dependency> + <dependency> + <groupId>org.scalacheck</groupId> + <artifactId>scalacheck_${scala.binary.version}</artifactId> + <scope>test</scope> + </dependency> + <dependency> + <groupId>org.mockito</groupId> + <artifactId>mockito-core</artifactId> + <scope>test</scope> + </dependency> + </dependencies> + <build> + <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory> + <plugins> + <!-- Shade all Protobuf dependencies of this build --> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-shade-plugin</artifactId> + <configuration> + <shadedArtifactAttached>false</shadedArtifactAttached> + <artifactSet> + <includes> + <include>com.google.protobuf:*</include> + <include>org.apache.spark:spark-connect-common_${scala.binary.version}</include> + </includes> + </artifactSet> + <relocations> + <relocation> + <pattern>com.google.protobuf</pattern> + <shadedPattern>${spark.shade.packageName}.connect.protobuf</shadedPattern> + <includes> + <include>com.google.protobuf.**</include> + </includes> + </relocation> + </relocations> + </configuration> + </plugin> + </plugins> + </build> +</project> \ No newline at end of file diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/connect/client/SparkConnectClient.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/connect/client/SparkConnectClient.scala new file mode 100644 index 00000000000..beaae6412be --- /dev/null +++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/connect/client/SparkConnectClient.scala @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.connect.client + +import org.apache.spark.connect.proto + +class SparkConnectClient(private val userContext: proto.UserContext) { + + /** + * Placeholder method. + * @return User ID. + */ + def userId: String = userContext.getUserId() +} + +object SparkConnectClient { + def builder(): Builder = new Builder() + + class Builder() { + private val userContextBuilder = proto.UserContext.newBuilder() + + def userId(id: String): Builder = { + userContextBuilder.setUserId(id) + this + } + + def build(): SparkConnectClient = { + new SparkConnectClient(userContextBuilder.build()) + } + } +} diff --git a/connector/connect/client/jvm/src/test/resources/log4j2.properties b/connector/connect/client/jvm/src/test/resources/log4j2.properties new file mode 100644 index 00000000000..ab02104c696 --- /dev/null +++ b/connector/connect/client/jvm/src/test/resources/log4j2.properties @@ -0,0 +1,39 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Set everything to be logged to the file target/unit-tests.log +rootLogger.level = info +rootLogger.appenderRef.file.ref = ${sys:test.appender:-File} + +appender.file.type = File +appender.file.name = File +appender.file.fileName = target/unit-tests.log +appender.file.layout.type = PatternLayout +appender.file.layout.pattern = %d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n%ex + +# Tests that launch java subprocesses can set the "test.appender" system property to +# "console" to avoid having the child process's logs overwrite the unit test's +# log file. +appender.console.type = Console +appender.console.name = console +appender.console.target = SYSTEM_ERR +appender.console.layout.type = PatternLayout +appender.console.layout.pattern = %t: %m%n%ex + +# Ignore messages below warning level from Jetty, because it's a bit verbose +logger.jetty.name = org.sparkproject.jetty +logger.jetty.level = warn diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/SparkConnectClientSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/SparkConnectClientSuite.scala new file mode 100644 index 00000000000..e0265bb210f --- /dev/null +++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/SparkConnectClientSuite.scala @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.sql.connect.client + +import org.scalatest.funsuite.AnyFunSuite // scalastyle:ignore funsuite +import org.apache.spark.connect.proto + +class SparkConnectClientSuite extends AnyFunSuite { // scalastyle:ignore funsuite + + private def createClient = { + new SparkConnectClient(proto.UserContext.newBuilder().build()) + } + + test("Placeholder test: Create SparkConnectClient") { + val client = SparkConnectClient.builder().userId("abc123").build() + assert(client.userId == "abc123") + } +} diff --git a/pom.xml b/pom.xml index 2804d9e560f..2a9ad47066e 100644 --- a/pom.xml +++ b/pom.xml @@ -102,6 +102,7 @@ <module>connector/avro</module> <module>connector/connect/server</module> <module>connector/connect/common</module> + <module>connector/connect/client/jvm</module> <module>connector/protobuf</module> <!-- See additional modules enabled by profiles below --> </modules> diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index 556f8528ea1..cf9342b7777 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -54,13 +54,14 @@ object BuildCommons { val connectCommon = ProjectRef(buildLocation, "connect-common") val connect = ProjectRef(buildLocation, "connect") + val connectClient = ProjectRef(buildLocation, "connect-client-jvm") val allProjects@Seq( core, graphx, mllib, mllibLocal, repl, networkCommon, networkShuffle, launcher, unsafe, tags, sketch, kvstore, _* ) = Seq( "core", "graphx", "mllib", "mllib-local", "repl", "network-common", "network-shuffle", "launcher", "unsafe", "tags", "sketch", "kvstore" - ).map(ProjectRef(buildLocation, _)) ++ sqlProjects ++ streamingProjects ++ Seq(connectCommon, connect) + ).map(ProjectRef(buildLocation, _)) ++ sqlProjects ++ streamingProjects ++ Seq(connectCommon, connect, connectClient) val optionallyEnabledProjects@Seq(kubernetes, mesos, yarn, sparkGangliaLgpl, streamingKinesisAsl, @@ -402,7 +403,7 @@ object SparkBuild extends PomBuild { val mimaProjects = allProjects.filterNot { x => Seq( spark, hive, hiveThriftServer, repl, networkCommon, networkShuffle, networkYarn, - unsafe, tags, tokenProviderKafka010, sqlKafka010, connectCommon, connect, protobuf + unsafe, tags, tokenProviderKafka010, sqlKafka010, connectCommon, connect, connectClient, protobuf ).contains(x) } @@ -445,6 +446,7 @@ object SparkBuild extends PomBuild { enable(SparkConnectCommon.settings)(connectCommon) enable(SparkConnect.settings)(connect) + enable(SparkConnectClient.settings)(connectClient) /* Protobuf settings */ enable(SparkProtobuf.settings)(protobuf) @@ -825,6 +827,62 @@ object SparkConnect { } } +object SparkConnectClient { + import BuildCommons.protoVersion + + lazy val settings = Seq( + // For some reason the resolution from the imported Maven build does not work for some + // of these dependendencies that we need to shade later on. + libraryDependencies ++= { + Seq( + "com.google.protobuf" % "protobuf-java" % protoVersion % "protobuf" + ) + }, + + dependencyOverrides ++= { + Seq( + "com.google.protobuf" % "protobuf-java" % protoVersion + ) + }, + + (assembly / test) := { }, + + (assembly / logLevel) := Level.Info, + + // Exclude `scala-library` from assembly. + (assembly / assemblyPackageScala / assembleArtifact) := false, + + // Exclude `pmml-model-*.jar`, `scala-collection-compat_*.jar`,`jsr305-*.jar` and + // `netty-*.jar` and `unused-1.0.0.jar` from assembly. + (assembly / assemblyExcludedJars) := { + val cp = (assembly / fullClasspath).value + cp filter { v => + val name = v.data.getName + name.startsWith("pmml-model-") || name.startsWith("scala-collection-compat_") || + name.startsWith("jsr305-") || name.startsWith("netty-") || name == "unused-1.0.0.jar" + } + }, + + (assembly / assemblyShadeRules) := Seq( + ShadeRule.rename("com.google.protobuf.**" -> "org.sparkproject.connect.protobuf.@1").inAll, + ), + + (assembly / assemblyMergeStrategy) := { + case m if m.toLowerCase(Locale.ROOT).endsWith("manifest.mf") => MergeStrategy.discard + // Drop all proto files that are not needed as artifacts of the build. + case m if m.toLowerCase(Locale.ROOT).endsWith(".proto") => MergeStrategy.discard + case _ => MergeStrategy.first + } + ) ++ { + Seq( + (Compile / PB.targets) := Seq( + PB.gens.java -> (Compile / sourceManaged).value, + PB.gens.plugin("grpc-java") -> (Compile / sourceManaged).value + ) + ) + } +} + object SparkProtobuf { import BuildCommons.protoVersion @@ -1331,10 +1389,10 @@ object Unidoc { (ScalaUnidoc / unidoc / unidocProjectFilter) := inAnyProject -- inProjects(OldDeps.project, repl, examples, tools, kubernetes, - yarn, tags, streamingKafka010, sqlKafka010, connectCommon, connect, protobuf), + yarn, tags, streamingKafka010, sqlKafka010, connectCommon, connect, connectClient, protobuf), (JavaUnidoc / unidoc / unidocProjectFilter) := inAnyProject -- inProjects(OldDeps.project, repl, examples, tools, kubernetes, - yarn, tags, streamingKafka010, sqlKafka010, connectCommon, connect, protobuf), + yarn, tags, streamingKafka010, sqlKafka010, connectCommon, connect, connectClient, protobuf), (ScalaUnidoc / unidoc / unidocAllClasspaths) := { ignoreClasspaths((ScalaUnidoc / unidoc / unidocAllClasspaths).value) @@ -1420,6 +1478,7 @@ object CopyDependencies { // produce the shaded Jar which happens automatically in the case of Maven. // Later, when the dependencies are copied, we manually copy the shaded Jar only. val fid = (LocalProject("connect") / assembly).value + val fidClient = (LocalProject("connect-client-jvm") / assembly).value val fidProtobuf = (LocalProject("protobuf") / assembly).value (Compile / dependencyClasspath).value.map(_.data) @@ -1433,6 +1492,9 @@ object CopyDependencies { if (jar.getName.contains("spark-connect") && !SbtPomKeys.profiles.value.contains("noshade-connect")) { Files.copy(fid.toPath, destJar.toPath) + } else if (jar.getName.contains("connect-client-jvm") && + !SbtPomKeys.profiles.value.contains("noshade-connect-client-jvm")) { + Files.copy(fidClient.toPath, destJar.toPath) } else if (jar.getName.contains("spark-protobuf") && !SbtPomKeys.profiles.value.contains("noshade-protobuf")) { Files.copy(fidProtobuf.toPath, destJar.toPath) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org