This is an automated email from the ASF dual-hosted git repository.

hvanhovell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new b3276ef4e3a [SPARK-41534][CONNECT][SQL] Setup initial client module 
for Spark Connect
b3276ef4e3a is described below

commit b3276ef4e3a275110c02e27e4b95dc3596b71cf4
Author: vicennial <venkata.gud...@databricks.com>
AuthorDate: Thu Dec 22 14:10:42 2022 -0400

    [SPARK-41534][CONNECT][SQL] Setup initial client module for Spark Connect
    
    ### What changes were proposed in this pull request?
    
    This PR sets up the client module (primarily the build for SBT and Maven) 
in Spark Connect in preparation for the Scala/JVM client. Placeholder 
`SparkConnectClient` is added in this PR.
    
    ### Why are the changes needed?
    
    For the development of the Scala/JVM client for Spark Connect.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No
    
    ### How was this patch tested?
    
    Existing build CI  + placeholder test.
    
    Closes #39078 from vicennial/clientModule.
    
    Lead-authored-by: vicennial <venkata.gud...@databricks.com>
    Co-authored-by: Venkata Sai Akhil Gudesa <gvs.akhil1...@gmail.com>
    Signed-off-by: Herman van Hovell <her...@databricks.com>
---
 connector/connect/client/jvm/pom.xml               | 94 ++++++++++++++++++++++
 .../sql/connect/client/SparkConnectClient.scala    | 46 +++++++++++
 .../jvm/src/test/resources/log4j2.properties       | 39 +++++++++
 .../connect/client/SparkConnectClientSuite.scala   | 32 ++++++++
 pom.xml                                            |  1 +
 project/SparkBuild.scala                           | 70 +++++++++++++++-
 6 files changed, 278 insertions(+), 4 deletions(-)

diff --git a/connector/connect/client/jvm/pom.xml 
b/connector/connect/client/jvm/pom.xml
new file mode 100644
index 00000000000..39de7725de2
--- /dev/null
+++ b/connector/connect/client/jvm/pom.xml
@@ -0,0 +1,94 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  ~ Licensed to the Apache Software Foundation (ASF) under one or more
+  ~ contributor license agreements.  See the NOTICE file distributed with
+  ~ this work for additional information regarding copyright ownership.
+  ~ The ASF licenses this file to You under the Apache License, Version 2.0
+  ~ (the "License"); you may not use this file except in compliance with
+  ~ the License.  You may obtain a copy of the License at
+  ~
+  ~    http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS,
+  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  ~ See the License for the specific language governing permissions and
+  ~ limitations under the License.
+  -->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0"; 
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance";
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 
http://maven.apache.org/xsd/maven-4.0.0.xsd";>
+  <modelVersion>4.0.0</modelVersion>
+  <parent>
+    <groupId>org.apache.spark</groupId>
+    <artifactId>spark-parent_2.12</artifactId>
+    <version>3.4.0-SNAPSHOT</version>
+    <relativePath>../../../../pom.xml</relativePath>
+  </parent>
+
+  <artifactId>spark-connect-client-jvm_2.12</artifactId>
+  <packaging>jar</packaging>
+  <name>Spark Project Connect Client</name>
+  <url>https://spark.apache.org/</url>
+  <properties>
+    <sbt.project.name>connect-client-jvm</sbt.project.name>
+  </properties>
+
+  <dependencies>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-connect-common_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <exclusions>
+        <exclusion>
+          <groupId>com.google.guava</groupId>
+          <artifactId>guava</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+    <dependency>
+      <groupId>com.google.protobuf</groupId>
+      <artifactId>protobuf-java</artifactId>
+      <version>${protobuf.version}</version>
+      <scope>compile</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.scalacheck</groupId>
+      <artifactId>scalacheck_${scala.binary.version}</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.mockito</groupId>
+      <artifactId>mockito-core</artifactId>
+      <scope>test</scope>
+    </dependency>
+  </dependencies>
+  <build>
+    
<testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
+    <plugins>
+      <!-- Shade all Protobuf dependencies of this build -->
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-shade-plugin</artifactId>
+        <configuration>
+          <shadedArtifactAttached>false</shadedArtifactAttached>
+          <artifactSet>
+            <includes>
+              <include>com.google.protobuf:*</include>
+              
<include>org.apache.spark:spark-connect-common_${scala.binary.version}</include>
+            </includes>
+          </artifactSet>
+          <relocations>
+            <relocation>
+              <pattern>com.google.protobuf</pattern>
+              
<shadedPattern>${spark.shade.packageName}.connect.protobuf</shadedPattern>
+              <includes>
+                <include>com.google.protobuf.**</include>
+              </includes>
+            </relocation>
+          </relocations>
+        </configuration>
+      </plugin>
+    </plugins>
+  </build>
+</project>
\ No newline at end of file
diff --git 
a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/connect/client/SparkConnectClient.scala
 
b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/connect/client/SparkConnectClient.scala
new file mode 100644
index 00000000000..beaae6412be
--- /dev/null
+++ 
b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/connect/client/SparkConnectClient.scala
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connect.client
+
+import org.apache.spark.connect.proto
+
+class SparkConnectClient(private val userContext: proto.UserContext) {
+
+  /**
+   * Placeholder method.
+   * @return User ID.
+   */
+  def userId: String = userContext.getUserId()
+}
+
+object SparkConnectClient {
+  def builder(): Builder = new Builder()
+
+  class Builder() {
+    private val userContextBuilder = proto.UserContext.newBuilder()
+
+    def userId(id: String): Builder = {
+      userContextBuilder.setUserId(id)
+      this
+    }
+
+    def build(): SparkConnectClient = {
+      new SparkConnectClient(userContextBuilder.build())
+    }
+  }
+}
diff --git a/connector/connect/client/jvm/src/test/resources/log4j2.properties 
b/connector/connect/client/jvm/src/test/resources/log4j2.properties
new file mode 100644
index 00000000000..ab02104c696
--- /dev/null
+++ b/connector/connect/client/jvm/src/test/resources/log4j2.properties
@@ -0,0 +1,39 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Set everything to be logged to the file target/unit-tests.log
+rootLogger.level = info
+rootLogger.appenderRef.file.ref = ${sys:test.appender:-File}
+
+appender.file.type = File
+appender.file.name = File
+appender.file.fileName = target/unit-tests.log
+appender.file.layout.type = PatternLayout
+appender.file.layout.pattern = %d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n%ex
+
+# Tests that launch java subprocesses can set the "test.appender" system 
property to
+# "console" to avoid having the child process's logs overwrite the unit test's
+# log file.
+appender.console.type = Console
+appender.console.name = console
+appender.console.target = SYSTEM_ERR
+appender.console.layout.type = PatternLayout
+appender.console.layout.pattern = %t: %m%n%ex
+
+# Ignore messages below warning level from Jetty, because it's a bit verbose
+logger.jetty.name = org.sparkproject.jetty
+logger.jetty.level = warn
diff --git 
a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/SparkConnectClientSuite.scala
 
b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/SparkConnectClientSuite.scala
new file mode 100644
index 00000000000..e0265bb210f
--- /dev/null
+++ 
b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/SparkConnectClientSuite.scala
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.connect.client
+
+import org.scalatest.funsuite.AnyFunSuite // scalastyle:ignore funsuite
+import org.apache.spark.connect.proto
+
+class SparkConnectClientSuite extends AnyFunSuite { // scalastyle:ignore 
funsuite
+
+  private def createClient = {
+    new SparkConnectClient(proto.UserContext.newBuilder().build())
+  }
+
+  test("Placeholder test: Create SparkConnectClient") {
+    val client = SparkConnectClient.builder().userId("abc123").build()
+    assert(client.userId == "abc123")
+  }
+}
diff --git a/pom.xml b/pom.xml
index 2804d9e560f..2a9ad47066e 100644
--- a/pom.xml
+++ b/pom.xml
@@ -102,6 +102,7 @@
     <module>connector/avro</module>
     <module>connector/connect/server</module>
     <module>connector/connect/common</module>
+    <module>connector/connect/client/jvm</module>
     <module>connector/protobuf</module>
     <!-- See additional modules enabled by profiles below -->
   </modules>
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 556f8528ea1..cf9342b7777 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -54,13 +54,14 @@ object BuildCommons {
 
   val connectCommon = ProjectRef(buildLocation, "connect-common")
   val connect = ProjectRef(buildLocation, "connect")
+  val connectClient = ProjectRef(buildLocation, "connect-client-jvm")
 
   val allProjects@Seq(
     core, graphx, mllib, mllibLocal, repl, networkCommon, networkShuffle, 
launcher, unsafe, tags, sketch, kvstore, _*
   ) = Seq(
     "core", "graphx", "mllib", "mllib-local", "repl", "network-common", 
"network-shuffle", "launcher", "unsafe",
     "tags", "sketch", "kvstore"
-  ).map(ProjectRef(buildLocation, _)) ++ sqlProjects ++ streamingProjects ++ 
Seq(connectCommon, connect)
+  ).map(ProjectRef(buildLocation, _)) ++ sqlProjects ++ streamingProjects ++ 
Seq(connectCommon, connect, connectClient)
 
   val optionallyEnabledProjects@Seq(kubernetes, mesos, yarn,
     sparkGangliaLgpl, streamingKinesisAsl,
@@ -402,7 +403,7 @@ object SparkBuild extends PomBuild {
   val mimaProjects = allProjects.filterNot { x =>
     Seq(
       spark, hive, hiveThriftServer, repl, networkCommon, networkShuffle, 
networkYarn,
-      unsafe, tags, tokenProviderKafka010, sqlKafka010, connectCommon, 
connect, protobuf
+      unsafe, tags, tokenProviderKafka010, sqlKafka010, connectCommon, 
connect, connectClient, protobuf
     ).contains(x)
   }
 
@@ -445,6 +446,7 @@ object SparkBuild extends PomBuild {
 
   enable(SparkConnectCommon.settings)(connectCommon)
   enable(SparkConnect.settings)(connect)
+  enable(SparkConnectClient.settings)(connectClient)
 
   /* Protobuf settings */
   enable(SparkProtobuf.settings)(protobuf)
@@ -825,6 +827,62 @@ object SparkConnect {
   }
 }
 
+object SparkConnectClient {
+  import BuildCommons.protoVersion
+
+  lazy val settings = Seq(
+    // For some reason the resolution from the imported Maven build does not 
work for some
+    // of these dependendencies that we need to shade later on.
+    libraryDependencies ++= {
+      Seq(
+        "com.google.protobuf" % "protobuf-java" % protoVersion % "protobuf"
+      )
+    },
+
+    dependencyOverrides ++= {
+      Seq(
+        "com.google.protobuf" % "protobuf-java" % protoVersion
+      )
+    },
+
+    (assembly / test) := { },
+
+    (assembly / logLevel) := Level.Info,
+
+    // Exclude `scala-library` from assembly.
+    (assembly / assemblyPackageScala / assembleArtifact) := false,
+
+    // Exclude `pmml-model-*.jar`, 
`scala-collection-compat_*.jar`,`jsr305-*.jar` and
+    // `netty-*.jar` and `unused-1.0.0.jar` from assembly.
+    (assembly / assemblyExcludedJars) := {
+      val cp = (assembly / fullClasspath).value
+      cp filter { v =>
+        val name = v.data.getName
+        name.startsWith("pmml-model-") || 
name.startsWith("scala-collection-compat_") ||
+          name.startsWith("jsr305-") || name.startsWith("netty-") || name == 
"unused-1.0.0.jar"
+      }
+    },
+
+    (assembly / assemblyShadeRules) := Seq(
+      ShadeRule.rename("com.google.protobuf.**" -> 
"org.sparkproject.connect.protobuf.@1").inAll,
+    ),
+
+    (assembly / assemblyMergeStrategy) := {
+      case m if m.toLowerCase(Locale.ROOT).endsWith("manifest.mf") => 
MergeStrategy.discard
+      // Drop all proto files that are not needed as artifacts of the build.
+      case m if m.toLowerCase(Locale.ROOT).endsWith(".proto") => 
MergeStrategy.discard
+      case _ => MergeStrategy.first
+    }
+  ) ++ {
+    Seq(
+      (Compile / PB.targets) := Seq(
+        PB.gens.java -> (Compile / sourceManaged).value,
+        PB.gens.plugin("grpc-java") -> (Compile / sourceManaged).value
+      )
+    )
+  }
+}
+
 object SparkProtobuf {
   import BuildCommons.protoVersion
 
@@ -1331,10 +1389,10 @@ object Unidoc {
 
     (ScalaUnidoc / unidoc / unidocProjectFilter) :=
       inAnyProject -- inProjects(OldDeps.project, repl, examples, tools, 
kubernetes,
-        yarn, tags, streamingKafka010, sqlKafka010, connectCommon, connect, 
protobuf),
+        yarn, tags, streamingKafka010, sqlKafka010, connectCommon, connect, 
connectClient, protobuf),
     (JavaUnidoc / unidoc / unidocProjectFilter) :=
       inAnyProject -- inProjects(OldDeps.project, repl, examples, tools, 
kubernetes,
-        yarn, tags, streamingKafka010, sqlKafka010, connectCommon, connect, 
protobuf),
+        yarn, tags, streamingKafka010, sqlKafka010, connectCommon, connect, 
connectClient, protobuf),
 
     (ScalaUnidoc / unidoc / unidocAllClasspaths) := {
       ignoreClasspaths((ScalaUnidoc / unidoc / unidocAllClasspaths).value)
@@ -1420,6 +1478,7 @@ object CopyDependencies {
       // produce the shaded Jar which happens automatically in the case of 
Maven.
       // Later, when the dependencies are copied, we manually copy the shaded 
Jar only.
       val fid = (LocalProject("connect") / assembly).value
+      val fidClient = (LocalProject("connect-client-jvm") / assembly).value
       val fidProtobuf = (LocalProject("protobuf") / assembly).value
 
       (Compile / dependencyClasspath).value.map(_.data)
@@ -1433,6 +1492,9 @@ object CopyDependencies {
           if (jar.getName.contains("spark-connect") &&
             !SbtPomKeys.profiles.value.contains("noshade-connect")) {
             Files.copy(fid.toPath, destJar.toPath)
+          } else if (jar.getName.contains("connect-client-jvm") &&
+            !SbtPomKeys.profiles.value.contains("noshade-connect-client-jvm")) 
{
+            Files.copy(fidClient.toPath, destJar.toPath)
           } else if (jar.getName.contains("spark-protobuf") &&
             !SbtPomKeys.profiles.value.contains("noshade-protobuf")) {
             Files.copy(fidProtobuf.toPath, destJar.toPath)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to