This is an automated email from the ASF dual-hosted git repository.
pdallig pushed a commit to branch branch-0.9
in repository https://gitbox.apache.org/repos/asf/zeppelin.git
The following commit(s) were added to refs/heads/branch-0.9 by this push:
new 6bd8c8d [ZEPPELIN-5200] Update dockerfile
6bd8c8d is described below
commit 6bd8c8db05994052bc50148daddbda0f2c861670
Author: Philipp Dallig <[email protected]>
AuthorDate: Mon Jan 25 11:38:18 2021 +0100
[ZEPPELIN-5200] Update dockerfile
This is a complete rewrite of the used Zeppelin Dockerfile.
Main benefits:
- update to ubuntu 20.04
- install python3 and R with conda as in your CI system
- Refactoring
* [ ] - Testing
* https://issues.apache.org/jira/browse/ZEPPELIN-5200
* Does the licenses files need update? No
* Is there breaking changes for older versions? No
* Does this needs documentation? No
Author: Philipp Dallig <[email protected]>
Closes #4022 from Reamer/docker_zeppelin and squashes the following commits:
4841ef327 [Philipp Dallig] Update miniconda version
ce7f0dd8e [Philipp Dallig] Add some more python modules
27fe42ce6 [Philipp Dallig] Use python 3.7
50362e2cd [Philipp Dallig] Correct comment
bf7f73ea4 [Philipp Dallig] Update dockerfile
(cherry picked from commit b1adb5a23a9c9e23d8527edbaa260cb70587d81b)
Signed-off-by: Philipp Dallig <[email protected]>
---
scripts/docker/zeppelin/bin/Dockerfile | 121 ++++++---------------
.../docker/zeppelin/bin/env_python_3_with_R.yml | 37 +++++++
2 files changed, 73 insertions(+), 85 deletions(-)
diff --git a/scripts/docker/zeppelin/bin/Dockerfile
b/scripts/docker/zeppelin/bin/Dockerfile
index 7eeebdb..bb611bc 100644
--- a/scripts/docker/zeppelin/bin/Dockerfile
+++ b/scripts/docker/zeppelin/bin/Dockerfile
@@ -13,106 +13,57 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-FROM ubuntu:18.04
-MAINTAINER Apache Software Foundation <[email protected]>
+FROM ubuntu:20.04
+
+LABEL maintainer="Apache Software Foundation <[email protected]>"
ENV Z_VERSION="0.9.0"
ENV LOG_TAG="[ZEPPELIN_${Z_VERSION}]:" \
- Z_HOME="/zeppelin" \
+ Z_HOME="/opt/zeppelin" \
LANG=en_US.UTF-8 \
LC_ALL=en_US.UTF-8 \
+ JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64 \
ZEPPELIN_ADDR="0.0.0.0"
-RUN echo "$LOG_TAG update and install basic packages" && \
- apt-get -y update && \
- apt-get install -y locales && \
- locale-gen $LANG && \
- apt-get install -y software-properties-common && \
- apt -y autoclean && \
- apt -y dist-upgrade && \
- apt-get install -y build-essential
-
-RUN echo "$LOG_TAG install tini related packages" && \
- apt-get install -y wget curl grep sed dpkg && \
- TINI_VERSION=`curl https://github.com/krallin/tini/releases/latest | grep
-o "/v.*\"" | sed 's:^..\(.*\).$:\1:'` && \
- curl -L
"https://github.com/krallin/tini/releases/download/v${TINI_VERSION}/tini_${TINI_VERSION}.deb"
> tini.deb && \
- dpkg -i tini.deb && \
- rm tini.deb
-
-ENV JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64
-RUN echo "$LOG_TAG Install java8" && \
- apt-get -y update && \
- apt-get install -y openjdk-8-jdk && \
- rm -rf /var/lib/apt/lists/*
-
-# should install conda first before numpy, matploylib since pip and python
will be installed by conda
-RUN echo "$LOG_TAG Install miniconda3 related packages" && \
+RUN echo "$LOG_TAG install basic packages" && \
apt-get -y update && \
- apt-get install -y bzip2 ca-certificates \
- libglib2.0-0 libxext6 libsm6 libxrender1 \
- git mercurial subversion && \
- echo 'export PATH=/opt/conda/bin:$PATH' > /etc/profile.d/conda.sh && \
- wget --quiet
https://repo.continuum.io/miniconda/Miniconda3-4.6.14-Linux-x86_64.sh -O
~/miniconda.sh && \
- /bin/bash ~/miniconda.sh -b -p /opt/conda && \
- rm ~/miniconda.sh
-
-ENV PATH /opt/conda/bin:$PATH
-
-RUN DEBIAN_FRONTEND="noninteractive" apt-get -y install tzdata
+ DEBIAN_FRONTEND=noninteractive apt-get install -y locales language-pack-en
tini openjdk-8-jre-headless wget && \
+ # Cleanup
+ rm -rf /var/lib/apt/lists/* && \
+ apt-get autoclean && \
+ apt-get clean
-RUN echo "$LOG_TAG Install python related packages" && \
- apt-get -y install software-properties-common && \
- apt-add-repository universe && \
- apt-get -y update && \
- apt-get install -y python-dev python-pip && \
- apt-get install -y gfortran && \
- # numerical/algebra packages
- apt-get install -y libblas-dev libatlas-base-dev liblapack-dev && \
- # font, image
- apt-get install -y libpng-dev libfreetype6-dev libxft-dev && \
- # for tkinter
- apt-get install -y python-tk libxml2-dev libxslt-dev zlib1g-dev && \
- hash -r && \
+# Install conda to manage python and R packages
+ARG miniconda_version="py37_4.9.2"
+# Hashes via https://docs.conda.io/en/latest/miniconda_hashes.html
+ARG
miniconda_sha256="79510c6e7bd9e012856e25dcb21b3e093aa4ac8113d9aa7e82a86987eabe1c31"
+# Install python and R packages via conda
+COPY env_python_3_with_R.yml /env_python_3_with_R.yml
+RUN set -ex && \
+ wget -nv
https://repo.anaconda.com/miniconda/Miniconda3-${miniconda_version}-Linux-x86_64.sh
-O miniconda.sh && \
+ echo "${miniconda_sha256} miniconda.sh" > anaconda.sha256 && \
+ sha256sum --strict -c anaconda.sha256 && \
+ bash miniconda.sh -b -p /opt/conda && \
+ export PATH=/opt/conda/bin:$PATH && \
conda config --set always_yes yes --set changeps1 no && \
- conda update -q conda && \
conda info -a && \
- conda config --add channels conda-forge && \
- pip install -q pycodestyle==2.5.0 && \
- pip install -q numpy==1.17.3 pandas==0.25.0 scipy==1.3.1 grpcio==1.19.0
bkzep==0.6.1 hvplot==0.5.2 protobuf==3.10.0 pandasql==0.7.3 ipython==7.8.0
matplotlib==3.0.3 ipykernel==5.1.2 jupyter_client==5.3.4 bokeh==1.3.4
panel==0.6.0 holoviews==1.12.3 seaborn==0.9.0 plotnine==0.5.1 intake==0.5.3
intake-parquet==0.2.2 altair==3.2.0 pycodestyle==2.5.0 apache_beam==2.15.0
-
-RUN echo "$LOG_TAG Install R related packages" && \
- echo "PATH: $PATH" && \
- ls /opt/conda/bin && \
- apt-key adv --keyserver keyserver.ubuntu.com --recv-keys
E298A3A825C0D65DFD57CBB651716619E084DAB9 && \
- add-apt-repository 'deb https://cloud.r-project.org/bin/linux/ubuntu
bionic-cran35/' && \
- apt-get -y update && \
- apt-get -y --allow-unauthenticated install r-base r-base-dev && \
- R -e "install.packages('evaluate', repos = 'https://cloud.r-project.org')"
&& \
- R -e "install.packages('knitr', repos='http://cran.us.r-project.org')" && \
- R -e "install.packages('ggplot2', repos='http://cran.us.r-project.org')"
&& \
- R -e "install.packages('googleVis', repos='http://cran.us.r-project.org')"
&& \
- R -e "install.packages('data.table',
repos='http://cran.us.r-project.org')" && \
- R -e "install.packages('IRkernel', repos =
'https://cloud.r-project.org');IRkernel::installspec()" && \
- R -e "install.packages('shiny', repos = 'https://cloud.r-project.org')"
-
-RUN echo "$LOG_TAG Install R related packages2" && \
- # for devtools, Rcpp
- apt-get -y install libcurl4-openssl-dev libssl-dev && \
- R -e "install.packages('devtools', repos='http://cran.us.r-project.org')"
&& \
- R -e "install.packages('Rcpp', repos='http://cran.us.r-project.org')" && \
- Rscript -e "library('devtools'); library('Rcpp');
install_github('ramnathv/rCharts')"
-
-RUN echo "$LOG_TAG Cleanup" && \
- apt-get autoclean && \
- apt-get clean
+ conda env update -f /env_python_3_with_R.yml --prune && \
+ # Cleanup
+ rm -v miniconda.sh anaconda.sha256 && \
+ # Cleanup based on
https://github.com/ContinuumIO/docker-images/commit/cac3352bf21a26fa0b97925b578fb24a0fe8c383
+ find /opt/conda/ -follow -type f -name '*.a' -delete && \
+ find /opt/conda/ -follow -type f -name '*.js.map' -delete && \
+ conda clean -ay
+ # Allow to modify conda packages. This allows malicious code to be
injected into other interpreter sessions, therefore it is disabled by default
+ # chmod -R ug+rwX /opt/conda
+ENV PATH /opt/conda/bin:$PATH
RUN echo "$LOG_TAG Download Zeppelin binary" && \
- wget --quiet -O /tmp/zeppelin-${Z_VERSION}-bin-all.tgz
http://archive.apache.org/dist/zeppelin/zeppelin-${Z_VERSION}/zeppelin-${Z_VERSION}-bin-all.tgz
&& \
- tar -zxvf /tmp/zeppelin-${Z_VERSION}-bin-all.tgz && \
- rm -rf /tmp/zeppelin-${Z_VERSION}-bin-all.tgz && \
mkdir -p ${Z_HOME} && \
- mv /zeppelin-${Z_VERSION}-bin-all/* ${Z_HOME}/ && \
+ wget -nv -O /tmp/zeppelin-${Z_VERSION}-bin-all.tgz
https://archive.apache.org/dist/zeppelin/zeppelin-${Z_VERSION}/zeppelin-${Z_VERSION}-bin-all.tgz
&& \
+ tar --strip-components=1 -zxvf /tmp/zeppelin-${Z_VERSION}-bin-all.tgz -C
${Z_HOME} && \
+ rm -f /tmp/zeppelin-${Z_VERSION}-bin-all.tgz && \
chown -R root:root ${Z_HOME} && \
mkdir -p ${Z_HOME}/logs ${Z_HOME}/run ${Z_HOME}/webapps && \
# Allow process to edit /etc/passwd, to create a user entry for zeppelin
diff --git a/scripts/docker/zeppelin/bin/env_python_3_with_R.yml
b/scripts/docker/zeppelin/bin/env_python_3_with_R.yml
new file mode 100644
index 0000000..fb88458
--- /dev/null
+++ b/scripts/docker/zeppelin/bin/env_python_3_with_R.yml
@@ -0,0 +1,37 @@
+name: base
+channels:
+ - conda-forge
+ - defaults
+dependencies:
+ - pycodestyle
+ - numpy=1
+ - pandas=0.25
+ - scipy=1
+ - grpcio
+ - hvplot
+ - protobuf=3
+ - pandasql=0.7.3
+ - ipython=7
+ - matplotlib=3
+ - ipykernel=5
+ - jupyter_client=5
+ - bokeh=1.3.4
+ - panel
+ - holoviews
+ - pyyaml=3
+ - altair
+ - intake
+ - plotnine
+ - seaborn
+ - pip
+ - pip:
+ - bkzep==0.6.1
+
+ - r-base=3
+ - r-evaluate
+ - r-base64enc
+ - r-knitr
+ - r-ggplot2
+ - r-irkernel
+ - r-shiny
+ - r-googlevis