potiuk commented on a change in pull request #4543: [AIRFLOW-3718] [WIP/SPLIT]
Multi-layered version of the docker image
URL: https://github.com/apache/airflow/pull/4543#discussion_r267804610
##########
File path: Dockerfile
##########
@@ -13,46 +13,352 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
+#
+# WARNING: THIS DOCKERFILE IS NOT INTENDED FOR PRODUCTION USE OR DEPLOYMENT.
+#
+# Arguments of the build
+ARG PYTHON_BASE_IMAGE="python:3.6-slim"
+ARG APT_DEPS_IMAGE="airflow-apt-deps"
+# Default cache image does not have /cache directory - it's the same as python
image
+ARG MASTER_WHEEL_CACHE_IMAGE=${PYTHON_BASE_IMAGE}
+ARG AIRFLOW_VERSION="2.0.0.dev0"
+# Speeds up building the image - cassandra driver without CYTHON saves around
10 minutes
+ARG CASS_DRIVER_NO_CYTHON="1"
+# Build cassandra driver on multiple CPUs
+ARG CASS_DRIVER_BUILD_CONCURRENCY="8"
+# By default PIP install is run without cache to make image smaller
+ARG PIP_CACHE_DIRECTIVE="--no-cache-dir"
+# Additional python deps to install
+ARG ADDITIONAL_PYTHON_DEPS=""
+# Whether to use wheel cache during the build
+ARG USE_WHEEL_CACHE="false"
+# PIP version used to install dependencies
+ARG PIP_VERSION="19.0.1"
+############################################################################################################
+# This is base image with APT dependencies needed by Airflow. It is based on a
python slim image
+# Parameters:
+# PYTHON_BASE_IMAGE - base python image (python:x.y-slim)
+############################################################################################################
+FROM ${PYTHON_BASE_IMAGE} as airflow-apt-deps
-FROM python:3.6-slim
+# Print RUN commands by default
SHELL ["/bin/bash", "-xc"]
-ENV AIRFLOW_HOME=/usr/local/airflow
-ARG AIRFLOW_DEPS="all"
-ARG PYTHON_DEPS=""
-ARG BUILD_DEPS="freetds-dev libkrb5-dev libssl-dev libffi-dev libpq-dev git"
-ARG APT_DEPS="libsasl2-dev freetds-bin build-essential
default-libmysqlclient-dev apt-utils curl rsync netcat locales"
+ARG PYTHON_BASE_IMAGE
+ARG AIRFLOW_VERSION
+ENV PYTHON_BASE_IMAGE=${PYTHON_BASE_IMAGE}
+ENV AIRFLOW_VERSION=$AIRFLOW_VERSION
+
+# Print versions
+RUN echo "Python version: ${PYTHON_VERSION}"
+RUN echo "Base image: ${PYTHON_BASE_IMAGE}"
+RUN echo "Airflow version: ${AIRFLOW_VERSION}"
+
+# Make sure noninteractie debian install is used and language variab1les set
+ENV DEBIAN_FRONTEND=noninteractive LANGUAGE=C.UTF-8 LANG=C.UTF-8
LC_ALL=C.UTF-8 \
+ LC_CTYPE=C.UTF-8 LC_MESSAGES=C.UTF-8
+
+# Increase the value below to force renstalling of all dependencies
+ENV FORCE_REINSTALL_ALL_DEPENDENCIES=1
+
+# Install curl and gnupg2 - needed to download nodejs in next step
+RUN apt-get update \
+ && apt-get install -y --no-install-recommends \
+ curl gnupg2 \
+ && apt-get autoremove -yqq --purge \
+ && apt-get clean \
+ && rm -rf /var/lib/apt/lists/*
+
+
+# Install basic apt dependencies
+RUN curl -sL https://deb.nodesource.com/setup_10.x | bash - \
+ && apt-get update \
+ && apt-get install -y --no-install-recommends \
+ # Packages to install \
+ libsasl2-dev freetds-bin build-essential \
+ default-libmysqlclient-dev apt-utils curl rsync netcat locales \
+ freetds-dev libkrb5-dev libssl-dev libffi-dev libpq-dev git \
+ nodejs sudo \
+ && apt-get autoremove -yqq --purge \
+ && apt-get clean \
+ && rm -rf /var/lib/apt/lists/*
+
+RUN adduser airflow && \
+ echo "airflow ALL=(ALL) NOPASSWD: ALL" > /etc/sudoers.d/airflow && \
+ chmod 0440 /etc/sudoers.d/airflow
+
+############################################################################################################
+# This is an image with all APT dependencies needed by CI. It is built on top
of the airlfow APT image
+# Parameters:
+# airflow-apt-deps - this is the base image for CI deps image.
+############################################################################################################
+FROM airflow-apt-deps as airflow-ci-apt-deps
+
+SHELL ["/bin/bash", "-xc"]
+
+ENV JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64/
+
+# Note missing directories on debian-stretch
https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=863199
+RUN mkdir -pv /usr/share/man/man1 \
+ && mkdir -pv /usr/share/man/man7 \
+ && apt-get update \
+ && apt-get install --no-install-recommends -y \
+ lsb-release \
+ gnupg \
+ dirmngr \
+ openjdk-8-jdk \
+ vim \
+ wget \
+ tmux \
+ less \
+ unzip \
+ ldap-utils \
+ postgresql-client \
+ sqlite3 \
+ krb5-user \
+ openssh-client \
+ openssh-server \
+ python-selinux \
+ sasl2-bin \
+ && apt-get autoremove -yqq --purge \
+ && apt-get clean && rm -rf /var/lib/apt/lists/*
+
+RUN export DEBIAN_FRONTEND=noninteractive \
+# gpg: key 5072E1F5: public key "MySQL Release Engineering
<[email protected]>" imported
+ && key='A4A9406876FCBD3C456770C88C718D3B5072E1F5' \
+ && export GNUPGHOME="$(mktemp -d)" \
+ && for keyserver in $(shuf -e \
+ ha.pool.sks-keyservers.net \
+ hkp://p80.pool.sks-keyservers.net:80 \
+ keyserver.ubuntu.com \
+ hkp://keyserver.ubuntu.com:80 \
+ pgp.mit.edu) ; do \
+ gpg --keyserver $keyserver --recv-keys "$key" && break ||
true ; \
+ done \
+ && gpg --export "$key" > /etc/apt/trusted.gpg.d/mysql.gpg \
+ && gpgconf --kill all \
+ rm -rf "$GNUPGHOME"; \
+ apt-key list > /dev/null \
+ && echo "deb http://repo.mysql.com/apt/ubuntu/ trusty mysql-5.7" | tee -a
/etc/apt/sources.list.d/mysql.list \
+ && apt-get update \
+ && MYSQL_PASS="secret" \
+ && debconf-set-selections <<< "mysql-community-server
mysql-community-server/data-dir select ''" \
+ && debconf-set-selections <<< "mysql-community-server
mysql-community-server/root-pass password $MYSQL_PASS" \
+ && debconf-set-selections <<< "mysql-community-server
mysql-community-server/re-root-pass password $MYSQL_PASS" \
+ && apt-get install --no-install-recommends -y mysql-client
libmysqlclient-dev \
+ && apt-get autoremove -yqq --purge \
+ && apt-get clean && rm -rf /var/lib/apt/lists/*
+
+ENV HADOOP_DISTRO=cdh
+ENV HADOOP_VERSION=2.6.0 HADOOP_HOME=/tmp/hadoop-${HADOOP_DISTRO}
HIVE_HOME=/tmp/hive
+
+RUN mkdir -pv ${HADOOP_HOME} && \
+ mkdir -pv ${HIVE_HOME} && \
+ mkdir /tmp/minicluster && \
+ mkdir -pv /user/hive/warehouse && \
+ chmod -R 777 ${HIVE_HOME} && \
+ chmod -R 777 /user/
+
+# Install Hadoop
+# --absolute-names is a work around to avoid this issue
https://github.com/docker/hub-feedback/issues/727
+RUN cd /tmp && \
+ wget -q
https://archive.cloudera.com/cdh5/cdh/5/hadoop-${HADOOP_VERSION}-cdh5.11.0.tar.gz
&& \
+ tar xzf hadoop-${HADOOP_VERSION}-cdh5.11.0.tar.gz --absolute-names
--strip-components 1 -C ${HADOOP_HOME} && \
+ rm hadoop-${HADOOP_VERSION}-cdh5.11.0.tar.gz
+
+# Install Hive
+RUN cd /tmp && \
Review comment:
All downloads here are now using curl and they are much better "structured"
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services