This is an automated email from the ASF dual-hosted git repository.

fanng pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino-playground.git


The following commit(s) were added to refs/heads/main by this push:
     new d64b86a  Optimize the Spark Jupyter (#88)
d64b86a is described below

commit d64b86a51fa919dbe25e906958833da9ef231455
Author: roryqi <[email protected]>
AuthorDate: Mon Oct 28 15:07:54 2024 +0800

    Optimize the Spark Jupyter (#88)
    
    I forgot upgrade the version of Spark Jupyter jar to 0.6.1
    I optimize the usage of Spark jars.
---
 docker-compose.yaml                                |  2 --
 init/common/init_metalake_catalog.sh               |  2 +-
 init/jupyter/gravitino-spark-trino-example.ipynb   |  2 +-
 .../jupyter-dependency.sh}                         | 25 +++++++++++++---------
 init/spark/spark-defaults.conf                     |  2 ++
 playground.sh                                      |  1 +
 6 files changed, 20 insertions(+), 14 deletions(-)

diff --git a/docker-compose.yaml b/docker-compose.yaml
index a4f2964..1decede 100644
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@@ -136,7 +136,6 @@ services:
     volumes:
       - ./init/spark:/tmp/spark
       - ./init/common:/tmp/common
-      - spark_jars:/opt/spark/jars
 
   jupyter:
     image: jupyter/pyspark-notebook:spark-3.4.1
@@ -145,7 +144,6 @@ services:
       - 18888:8888
     volumes:
       - ./init/jupyter:/tmp/gravitino
-      - spark_jars:/opt/spark/jars
     entrypoint: /bin/bash /tmp/gravitino/init.sh
     depends_on:
       hive :
diff --git a/init/common/init_metalake_catalog.sh 
b/init/common/init_metalake_catalog.sh
index 9b2bed4..5da3246 100644
--- a/init/common/init_metalake_catalog.sh
+++ b/init/common/init_metalake_catalog.sh
@@ -35,7 +35,7 @@ if echo "$response" | grep -q "\"code\":0"; then
   true
 else
   # Create Hive catalog for experience Gravitino service
-  response=$(curl -X POST -H "Content-Type: application/json" -d 
'{"name":"catalog_hive","type":"RELATIONAL", "provider":"hive", 
"comment":"comment","properties":{"metastore.uris":"thrift://hive:9083", 
"spark.bypass.spark.sql.hive.metastore.jars":"path", 
"spark.bypass.spark.sql.hive.metastore.jars.path":"file:///opt/spark/jars/*" 
}}' http://gravitino:8090/api/metalakes/metalake_demo/catalogs)
+  response=$(curl -X POST -H "Content-Type: application/json" -d 
'{"name":"catalog_hive","type":"RELATIONAL", "provider":"hive", 
"comment":"comment","properties":{"metastore.uris":"thrift://hive:9083"}}' 
http://gravitino:8090/api/metalakes/metalake_demo/catalogs)
   if echo "$response" | grep -q "\"code\":0"; then
     true # Placeholder, do nothing
   else
diff --git a/init/jupyter/gravitino-spark-trino-example.ipynb 
b/init/jupyter/gravitino-spark-trino-example.ipynb
index eaf14fe..48ea713 100644
--- a/init/jupyter/gravitino-spark-trino-example.ipynb
+++ b/init/jupyter/gravitino-spark-trino-example.ipynb
@@ -21,7 +21,7 @@
     "spark = SparkSession.builder \\\n",
     "    .appName(\"PySpark SQL Example\") \\\n",
     "    .config(\"spark.plugins\", 
\"org.apache.gravitino.spark.connector.plugin.GravitinoSparkPlugin\") \\\n",
-    "    .config(\"spark.jars\", 
\"/opt/spark/jars/iceberg-spark-runtime-3.4_2.12-1.5.2.jar,/opt/spark/jars/gravitino-spark-connector-runtime-3.4_2.12-0.6.1-incubating.jar\")
 \\\n",
+    "    .config(\"spark.jars\", 
\"/tmp/gravitino/packages/iceberg-spark-runtime-3.4_2.12-1.5.2.jar,/tmp/gravitino/packages/gravitino-spark-connector-runtime-3.4_2.12-0.6.1-incubating.jar\")
 \\\n",
     "    .config(\"spark.sql.gravitino.uri\", \"http://gravitino:8090\";) \\\n",
     "    .config(\"spark.sql.gravitino.metalake\", \"metalake_demo\") \\\n",
     "    .config(\"spark.sql.gravitino.enableIcebergSupport\", \"true\") \\\n",
diff --git a/init/spark/spark-defaults.conf b/init/jupyter/jupyter-dependency.sh
old mode 100644
new mode 100755
similarity index 58%
copy from init/spark/spark-defaults.conf
copy to init/jupyter/jupyter-dependency.sh
index c6c72b4..3ddb748
--- a/init/spark/spark-defaults.conf
+++ b/init/jupyter/jupyter-dependency.sh
@@ -1,3 +1,5 @@
+#!/bin/bash
+
 #
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
@@ -17,13 +19,16 @@
 # under the License.
 #
 
-spark.plugins org.apache.gravitino.spark.connector.plugin.GravitinoSparkPlugin
-spark.sql.gravitino.uri http://gravitino:8090
-spark.sql.gravitino.metalake metalake_demo
-spark.sql.gravitino.enableIcebergSupport true 
-spark.sql.extensions 
org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions
-spark.sql.catalog.catalog_rest org.apache.iceberg.spark.SparkCatalog
-spark.sql.catalog.catalog_rest.type rest
-spark.sql.catalog.catalog_rest.uri http://gravitino:9001/iceberg/
-spark.locality.wait.node 0
-spark.sql.warehouse.dir hdfs://hive:9000/user/hive/warehouse
+jupyter_dir="$(dirname "${BASH_SOURCE-$0}")"
+jupyter_dir="$(
+  cd "${jupyter_dir}" >/dev/null
+  pwd
+)"
+. "${jupyter_dir}/../common/common.sh"
+
+# Prepare download packages
+if [[ ! -d "${jupyter_dir}/packages" ]]; then
+  mkdir "${jupyter_dir}/packages"
+  find "${jupyter_dir}/../spark/packages/" | grep jar | xargs -I {} ln {} 
"${jupyter_dir}/packages/"
+fi
+
diff --git a/init/spark/spark-defaults.conf b/init/spark/spark-defaults.conf
index c6c72b4..446f865 100644
--- a/init/spark/spark-defaults.conf
+++ b/init/spark/spark-defaults.conf
@@ -27,3 +27,5 @@ spark.sql.catalog.catalog_rest.type rest
 spark.sql.catalog.catalog_rest.uri http://gravitino:9001/iceberg/
 spark.locality.wait.node 0
 spark.sql.warehouse.dir hdfs://hive:9000/user/hive/warehouse
+spark.sql.hive.metastore.jars path
+spark.sql.hive.metastore.jars.path file:///opt/spark/jars/*
diff --git a/playground.sh b/playground.sh
index 2144453..abd4777 100755
--- a/playground.sh
+++ b/playground.sh
@@ -73,6 +73,7 @@ start() {
   echo "Preparing packages..."
   ./init/spark/spark-dependency.sh
   ./init/gravitino/gravitino-dependency.sh
+  ./init/jupyter/jupyter-dependency.sh
 
   logSuffix=$(date +%Y%m%d%H%m%s)
   docker-compose up --detach

Reply via email to