This is an automated email from the ASF dual-hosted git repository. hvanhovell pushed a commit to branch branch-3.4 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.4 by this push: new 1847ebafb98 [SPARK-42656][CONNECT][FOLLOWUP] Spark Connect Shell 1847ebafb98 is described below commit 1847ebafb986a0babebda39f11f39c666a0ff83d Author: Zhen Li <zhenli...@users.noreply.github.com> AuthorDate: Tue Mar 7 08:34:40 2023 -0400 [SPARK-42656][CONNECT][FOLLOWUP] Spark Connect Shell ### What changes were proposed in this pull request? Add spark connect shell to start the spark shell with spark connect enabled. Added "-Pconnect" to build the spark connect in the distributions. Simplified the dev shell scripts with "-Pconnect" command. ### Why are the changes needed? Allow users to play with spark connect easily. ### Does this PR introduce _any_ user-facing change? Yes. Added a new shell script and "-Pconnect" build option. ### How was this patch tested? Manually tested. Closes #40305 from zhenlineo/connect-shell. Authored-by: Zhen Li <zhenli...@users.noreply.github.com> Signed-off-by: Herman van Hovell <her...@databricks.com> (cherry picked from commit 2e7207f96e1ff848def135de63f63bcda7402517) Signed-off-by: Herman van Hovell <her...@databricks.com> --- assembly/pom.xml | 10 ++++++++++ .../bin/spark-connect => bin/spark-connect-shell | 18 ++++++------------ connector/connect/bin/spark-connect | 13 ++++++------- connector/connect/bin/spark-connect-scala-client.sc | 1 + .../connect/bin/{spark-connect => spark-connect-shell} | 15 +++++++-------- docs/building-spark.md | 4 ++++ .../main/scala-2.12/org/apache/spark/repl/Main.scala | 5 +++++ 7 files changed, 39 insertions(+), 27 deletions(-) diff --git a/assembly/pom.xml b/assembly/pom.xml index a4111eb64d9..b86fee4bceb 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -152,6 +152,16 @@ </dependency> </dependencies> </profile> + <profile> + <id>connect</id> + <dependencies> + <dependency> + <groupId>org.apache.spark</groupId> + <artifactId>spark-connect_${scala.binary.version}</artifactId> + <version>${project.version}</version> + </dependency> + </dependencies> + </profile> <profile> <id>kubernetes</id> <dependencies> diff --git a/connector/connect/bin/spark-connect b/bin/spark-connect-shell similarity index 53% copy from connector/connect/bin/spark-connect copy to bin/spark-connect-shell index 2f2ce7df08c..9026c81e70d 100755 --- a/connector/connect/bin/spark-connect +++ b/bin/spark-connect-shell @@ -17,17 +17,11 @@ # limitations under the License. # -# Go to the Spark project root directory -FWDIR="$(cd "`dirname "$0"`"/../../..; pwd)" -cd "$FWDIR" -export SPARK_HOME=$FWDIR +# The shell script to start a spark-shell with spark connect enabled. -SCALA_BINARY_VER=`grep "scala.binary.version" "${SPARK_HOME}/pom.xml" | head -n1 | awk -F '[<>]' '{print $3}'` -SCALA_ARG=$(if [ "${SCALA_BINARY_VER}" == "2.13" ]; then echo "-Pscala-2.13"; else echo ""; fi) +if [ -z "${SPARK_HOME}" ]; then + source "$(dirname "$0")"/find-spark-home +fi -# Build the jars needed for spark submit and spark connect -build/sbt "${SCALA_ARG}" -Phive package - -CONNECT_JAR=`ls "${SPARK_HOME}"/connector/connect/server/target/scala-"${SCALA_BINARY_VER}"/spark-connect-assembly*.jar | paste -sd ',' -` - -exec "${SPARK_HOME}"/bin/spark-submit "$@" --class org.apache.spark.sql.connect.SimpleSparkConnectService "$CONNECT_JAR" \ No newline at end of file +# This requires building the spark with `-Pconnect`, e,g, `build/sbt -Pconnect package` +exec "${SPARK_HOME}"/bin/spark-shell --conf spark.plugins=org.apache.spark.sql.connect.SparkConnectPlugin "$@" \ No newline at end of file diff --git a/connector/connect/bin/spark-connect b/connector/connect/bin/spark-connect index 2f2ce7df08c..62d0d36b441 100755 --- a/connector/connect/bin/spark-connect +++ b/connector/connect/bin/spark-connect @@ -17,17 +17,16 @@ # limitations under the License. # +# Start the spark-connect with server logs printed in the standard output. The script rebuild the +# server dependencies and start the server at the default port. This can be used to debug client +# during client development. + # Go to the Spark project root directory FWDIR="$(cd "`dirname "$0"`"/../../..; pwd)" cd "$FWDIR" export SPARK_HOME=$FWDIR -SCALA_BINARY_VER=`grep "scala.binary.version" "${SPARK_HOME}/pom.xml" | head -n1 | awk -F '[<>]' '{print $3}'` -SCALA_ARG=$(if [ "${SCALA_BINARY_VER}" == "2.13" ]; then echo "-Pscala-2.13"; else echo ""; fi) - # Build the jars needed for spark submit and spark connect -build/sbt "${SCALA_ARG}" -Phive package - -CONNECT_JAR=`ls "${SPARK_HOME}"/connector/connect/server/target/scala-"${SCALA_BINARY_VER}"/spark-connect-assembly*.jar | paste -sd ',' -` +build/sbt -Phive -Pconnect package -exec "${SPARK_HOME}"/bin/spark-submit "$@" --class org.apache.spark.sql.connect.SimpleSparkConnectService "$CONNECT_JAR" \ No newline at end of file +exec "${SPARK_HOME}"/bin/spark-submit --class org.apache.spark.sql.connect.SimpleSparkConnectService "$@" \ No newline at end of file diff --git a/connector/connect/bin/spark-connect-scala-client.sc b/connector/connect/bin/spark-connect-scala-client.sc index a8d1856498c..9cb4f92417d 100644 --- a/connector/connect/bin/spark-connect-scala-client.sc +++ b/connector/connect/bin/spark-connect-scala-client.sc @@ -22,6 +22,7 @@ val sessionBuilder = SparkSession.builder() val spark = if (conStr.isEmpty) sessionBuilder.build() else sessionBuilder.remote(conStr).build() import spark.implicits._ import spark.sql +println("Spark session available as 'spark'.") println( """ | _____ __ ______ __ diff --git a/connector/connect/bin/spark-connect b/connector/connect/bin/spark-connect-shell similarity index 62% copy from connector/connect/bin/spark-connect copy to connector/connect/bin/spark-connect-shell index 2f2ce7df08c..b31ba1bf140 100755 --- a/connector/connect/bin/spark-connect +++ b/connector/connect/bin/spark-connect-shell @@ -17,17 +17,16 @@ # limitations under the License. # +# The spark connect shell for development. This shell script builds the spark connect server with +# all dependencies and starts the server at the default port. +# Use `/bin/spark-connect-shell` instead if rebuilding the dependency jars are not needed. + # Go to the Spark project root directory FWDIR="$(cd "`dirname "$0"`"/../../..; pwd)" cd "$FWDIR" export SPARK_HOME=$FWDIR -SCALA_BINARY_VER=`grep "scala.binary.version" "${SPARK_HOME}/pom.xml" | head -n1 | awk -F '[<>]' '{print $3}'` -SCALA_ARG=$(if [ "${SCALA_BINARY_VER}" == "2.13" ]; then echo "-Pscala-2.13"; else echo ""; fi) - -# Build the jars needed for spark submit and spark connect -build/sbt "${SCALA_ARG}" -Phive package - -CONNECT_JAR=`ls "${SPARK_HOME}"/connector/connect/server/target/scala-"${SCALA_BINARY_VER}"/spark-connect-assembly*.jar | paste -sd ',' -` +# Build the jars needed for spark shell and spark connect +build/sbt -Phive -Pconnect package -exec "${SPARK_HOME}"/bin/spark-submit "$@" --class org.apache.spark.sql.connect.SimpleSparkConnectService "$CONNECT_JAR" \ No newline at end of file +exec "${SPARK_HOME}"/bin/spark-shell --conf spark.plugins=org.apache.spark.sql.connect.SparkConnectPlugin "$@" \ No newline at end of file diff --git a/docs/building-spark.md b/docs/building-spark.md index 9b115f1ad91..be1c9062c5e 100644 --- a/docs/building-spark.md +++ b/docs/building-spark.md @@ -119,6 +119,10 @@ For instance, you can build the Spark Streaming module using: where `spark-streaming_{{site.SCALA_BINARY_VERSION}}` is the `artifactId` as defined in `streaming/pom.xml` file. +## Building with Spark Connect support + + ./build/mvn -Pconnect -DskipTests clean package + ## Continuous Compilation We use the scala-maven-plugin which supports incremental and continuous compilation. E.g. diff --git a/repl/src/main/scala-2.12/org/apache/spark/repl/Main.scala b/repl/src/main/scala-2.12/org/apache/spark/repl/Main.scala index a68b112ed2b..eaca4ad6ee2 100644 --- a/repl/src/main/scala-2.12/org/apache/spark/repl/Main.scala +++ b/repl/src/main/scala-2.12/org/apache/spark/repl/Main.scala @@ -121,6 +121,11 @@ object Main extends Logging { sparkContext = sparkSession.sparkContext sparkSession } catch { + case e: ClassNotFoundException if isShellSession && e.getMessage.contains( + "org.apache.spark.sql.connect.SparkConnectPlugin") => + logError("Failed to load spark connect plugin.") + logError("You need to build Spark with -Pconnect.") + sys.exit(1) case e: Exception if isShellSession => logError("Failed to initialize Spark session.", e) sys.exit(1) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org