This is an automated email from the ASF dual-hosted git repository.
yzheng pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/polaris.git
The following commit(s) were added to refs/heads/main by this push:
new 232678eb9 Change getting-start docker file to use official spark image
from outdated jupyter image (#2943)
232678eb9 is described below
commit 232678eb93b8105fb4beb28238ed745e7387fe30
Author: Yong Zheng <[email protected]>
AuthorDate: Fri Oct 31 19:08:35 2025 -0500
Change getting-start docker file to use official spark image from outdated
jupyter image (#2943)
* Use official spark image
* Use official spark image
* Use official spark image
* Use official spark image
* Use official spark image
---
getting-started/spark/docker-compose.yml | 2 +-
getting-started/spark/notebooks/Dockerfile | 35 ++++++++++++++++------
getting-started/spark/notebooks/SparkPolaris.ipynb | 2 +-
.../v3.5/getting-started/notebooks/Dockerfile | 5 ++--
.../notebook_requirements.txt | 0
5 files changed, 30 insertions(+), 14 deletions(-)
diff --git a/getting-started/spark/docker-compose.yml
b/getting-started/spark/docker-compose.yml
index 293c67d30..f6a57795d 100644
--- a/getting-started/spark/docker-compose.yml
+++ b/getting-started/spark/docker-compose.yml
@@ -52,4 +52,4 @@ services:
AWS_REGION: us-west-2
POLARIS_HOST: polaris
volumes:
- - ./notebooks:/home/jovyan/notebooks
+ - ./notebooks:/home/spark/notebooks
diff --git a/getting-started/spark/notebooks/Dockerfile
b/getting-started/spark/notebooks/Dockerfile
index def61a489..32ee4067b 100644
--- a/getting-started/spark/notebooks/Dockerfile
+++ b/getting-started/spark/notebooks/Dockerfile
@@ -17,15 +17,32 @@
# under the License.
#
-FROM jupyter/all-spark-notebook:spark-3.5.0
+FROM docker.io/apache/spark:3.5.6-java17
-ENV LANGUAGE='en_US:en'
+ENV
PYTHONPATH="${SPARK_HOME}/python/:${SPARK_HOME}/python/lib/py4j-0.10.9.7-src.zip:/home/spark/venv/lib/python3.10/site-packages"
+ENV PYSPARK_PYTHON=/home/spark/venv/bin/python
-COPY --chown=jovyan client /home/jovyan/client
-COPY --chown=jovyan regtests/requirements.txt /tmp
-RUN pip install -r /tmp/requirements.txt && \
- cd client/python && poetry lock && \
- python3 -m poetry install && \
- pip install -e .
+USER root
-WORKDIR /home/jovyan/
+RUN apt-get update -y && \
+ apt-get install -y python3-venv && \
+ mkdir -p /home/spark && \
+ chown -R spark /home/spark
+
+USER spark
+
+WORKDIR /home/spark
+
+COPY --chown=spark client /home/spark/client
+COPY --chown=spark regtests/requirements.txt /tmp
+COPY --chown=spark regtests/notebook_requirements.txt /tmp
+
+RUN python3 -m venv /home/spark/venv && \
+ . /home/spark/venv/bin/activate && \
+ pip install -r /tmp/requirements.txt -r /tmp/notebook_requirements.txt && \
+ cd client/python && \
+ poetry lock && \
+ poetry install --all-extras
+
+EXPOSE 8888
+CMD ["/home/spark/venv/bin/jupyter", "lab", "--ip=0.0.0.0", "--port=8888",
"--no-browser", "--NotebookApp.token=''"]
diff --git a/getting-started/spark/notebooks/SparkPolaris.ipynb
b/getting-started/spark/notebooks/SparkPolaris.ipynb
index 0e5513797..4eb04a7c5 100644
--- a/getting-started/spark/notebooks/SparkPolaris.ipynb
+++ b/getting-started/spark/notebooks/SparkPolaris.ipynb
@@ -792,7 +792,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.11.6"
+ "version": "3.10.12"
},
"toc-autonumbering": false,
"toc-showmarkdowntxt": false,
diff --git a/plugins/spark/v3.5/getting-started/notebooks/Dockerfile
b/plugins/spark/v3.5/getting-started/notebooks/Dockerfile
index 9af0b5fd3..f5e052b2a 100644
--- a/plugins/spark/v3.5/getting-started/notebooks/Dockerfile
+++ b/plugins/spark/v3.5/getting-started/notebooks/Dockerfile
@@ -34,14 +34,13 @@ USER spark
WORKDIR /home/spark
COPY --chown=spark client /home/spark/client
-COPY --chown=spark
plugins/spark/v3.5/getting-started/notebooks/requirements.txt
/tmp/notebook_requirements.txt
COPY --chown=spark regtests/requirements.txt /tmp
+COPY --chown=spark regtests/notebook_requirements.txt /tmp
COPY --chown=spark plugins/spark/v3.5/spark/build/2.12/libs
/home/spark/polaris_libs
-SHELL ["/bin/bash", "-c"]
RUN python3 -m venv /home/spark/venv && \
- source /home/spark/venv/bin/activate && \
+ . /home/spark/venv/bin/activate && \
pip install -r /tmp/requirements.txt -r /tmp/notebook_requirements.txt && \
cd client/python && \
poetry lock && \
diff --git a/plugins/spark/v3.5/getting-started/notebooks/requirements.txt
b/regtests/notebook_requirements.txt
similarity index 100%
rename from plugins/spark/v3.5/getting-started/notebooks/requirements.txt
rename to regtests/notebook_requirements.txt