This is an automated email from the ASF dual-hosted git repository.
potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/main by this push:
new 3feb057 Uses airflow user for build segment of docker image (#20744)
3feb057 is described below
commit 3feb057f0ee34482c2ec28c4e3bedd2d07586347
Author: Jarek Potiuk <[email protected]>
AuthorDate: Sat Jan 8 20:41:29 2022 +0100
Uses airflow user for build segment of docker image (#20744)
PIP produces a warning when root user is used to run pip install.
This is done for a good reason - because installing PIP this way
clashes with a number of distro-managed python packages.
The warning cannot be disabled even if our use case is legitimate
as has been extensively discussed in
https://github.com/pypa/pip/issues/10556.
However, the advice given by the warning is a bit misleading - it
suggests to use virtualenv, but since this is considered a bad practice
for container building and because we need to create virtualenvs
dynamically inside the image, using virtualenv is a bad solution for us.
It's been attempted in #19189 and failed.
Instead we create an airflow user and use PIP_USER="true" which
installs all dependencies in build segment to ~/.local folder
from where we can copy it to the main image.
That get rids of the warning and at the same time allows us to
keep the best practices of building the images.
---
Dockerfile | 48 ++++++++++++++++++++++++++++++++++--------------
1 file changed, 34 insertions(+), 14 deletions(-)
diff --git a/Dockerfile b/Dockerfile
index 8de5fef..5eaaff4 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -40,6 +40,7 @@ ARG ADDITIONAL_PYTHON_DEPS=""
ARG AIRFLOW_HOME=/opt/airflow
ARG AIRFLOW_UID="50000"
+ARG AIRFLOW_USER_HOME_DIR=/home/airflow
ARG PYTHON_BASE_IMAGE="python:3.6-slim-buster"
@@ -173,6 +174,10 @@ ARG UPGRADE_TO_NEWER_DEPENDENCIES="false"
ARG AIRFLOW_SOURCES_FROM="empty"
ARG AIRFLOW_SOURCES_TO="/empty"
+ARG AIRFLOW_HOME
+ARG AIRFLOW_USER_HOME_DIR
+ARG AIRFLOW_UID
+
ENV INSTALL_MYSQL_CLIENT=${INSTALL_MYSQL_CLIENT} \
INSTALL_MSSQL_CLIENT=${INSTALL_MSSQL_CLIENT} \
AIRFLOW_REPO=${AIRFLOW_REPO} \
@@ -183,26 +188,40 @@ ENV INSTALL_MYSQL_CLIENT=${INSTALL_MYSQL_CLIENT} \
AIRFLOW_CONSTRAINTS_REFERENCE=${AIRFLOW_CONSTRAINTS_REFERENCE} \
AIRFLOW_CONSTRAINTS_LOCATION=${AIRFLOW_CONSTRAINTS_LOCATION} \
DEFAULT_CONSTRAINTS_BRANCH=${DEFAULT_CONSTRAINTS_BRANCH} \
- PATH=${PATH}:/root/.local/bin \
+ PATH=${PATH}:${AIRFLOW_USER_HOME_DIR}/.local/bin \
AIRFLOW_PIP_VERSION=${AIRFLOW_PIP_VERSION} \
PIP_PROGRESS_BAR=${PIP_PROGRESS_BAR} \
+ AIRFLOW_USER_HOME_DIR=${AIRFLOW_USER_HOME_DIR} \
+ AIRFLOW_HOME=${AIRFLOW_HOME} \
+ AIRFLOW_UID=${AIRFLOW_UID} \
AIRFLOW_INSTALL_EDITABLE_FLAG="" \
UPGRADE_TO_NEWER_DEPENDENCIES=${UPGRADE_TO_NEWER_DEPENDENCIES} \
# By default PIP installs everything to ~/.local
PIP_USER="true"
-COPY scripts/docker/*.sh /scripts/docker/
+COPY scripts/docker/install_mysql.sh scripts/docker/install_mssql.sh
/scripts/docker/
+
RUN bash ./scripts/docker/install_mysql.sh dev \
&& bash ./scripts/docker/install_mssql.sh
ENV PATH=${PATH}:/opt/mssql-tools/bin
COPY docker-context-files /docker-context-files
+RUN adduser --gecos "First Last,RoomNumber,WorkPhone,HomePhone"
--disabled-password \
+ --quiet "airflow" --uid "${AIRFLOW_UID}" --gid "0" --home
"${AIRFLOW_USER_HOME_DIR}" && \
+ mkdir -p ${AIRFLOW_HOME} && chown -R "airflow:0"
"${AIRFLOW_USER_HOME_DIR}" ${AIRFLOW_HOME}
+
+USER airflow
+
RUN if [[ -f /docker-context-files/pip.conf ]]; then \
- mkdir -p /root/.config/pip; \
- cp /docker-context-files/pip.conf /root/.config/pip/pip.conf; \
+ mkdir -p ${AIRFLOW_USER_HOME_DIR}/.config/pip; \
+ cp /docker-context-files/pip.conf
"${AIRFLOW_USER_HOME_DIR}/.config/pip/pip.conf"; \
fi
+# Copy all scripts required for installation - changing any of those should
lead to
+# rebuilding from here
+COPY --chown=airflow:0 scripts/docker/* /scripts/docker/
+
ENV AIRFLOW_PRE_CACHED_PIP_PACKAGES=${AIRFLOW_PRE_CACHED_PIP_PACKAGES} \
INSTALL_PROVIDERS_FROM_SOURCES=${INSTALL_PROVIDERS_FROM_SOURCES} \
AIRFLOW_VERSION=${AIRFLOW_VERSION} \
@@ -224,7 +243,7 @@ RUN bash /scripts/docker/install_pip_version.sh; \
bash /scripts/docker/install_airflow_dependencies_from_branch_tip.sh; \
fi
-COPY ${AIRFLOW_SOURCES_FROM} ${AIRFLOW_SOURCES_TO}
+COPY --chown=airflow:0 ${AIRFLOW_SOURCES_FROM} ${AIRFLOW_SOURCES_TO}
# Add extra python dependencies
ARG ADDITIONAL_PYTHON_DEPS=""
@@ -263,11 +282,11 @@ RUN if [[ ${AIRFLOW_INSTALLATION_METHOD} == "." ]]; then \
if [[ -n "${ADDITIONAL_PYTHON_DEPS}" ]]; then \
bash /scripts/docker/install_additional_dependencies.sh; \
fi; \
- find /root/.local/ -name '*.pyc' -print0 | xargs -0 rm -r || true ; \
- find /root/.local/ -type d -name '__pycache__' -print0 | xargs -0 rm -r ||
true ; \
+ find "${AIRFLOW_USER_HOME_DIR}/.local/" -name '*.pyc' -print0 | xargs -0
rm -f || true ; \
+ find "${AIRFLOW_USER_HOME_DIR}/.local/" -type d -name '__pycache__'
-print0 | xargs -0 rm -rf || true ; \
# make sure that all directories and files in .local are also group
accessible
- find /root/.local -executable -print0 | xargs --null chmod g+x; \
- find /root/.local -print0 | xargs --null chmod g+rw
+ find "${AIRFLOW_USER_HOME_DIR}/.local" -executable -print0 | xargs --null
chmod g+x; \
+ find "${AIRFLOW_USER_HOME_DIR}/.local" -print0 | xargs --null chmod g+rw
# In case there is a requirements.txt file in "docker-context-files" it will
be installed
# during the build additionally to whatever has been installed so far. It is
recommended that
@@ -372,7 +391,7 @@ ARG ADDITIONAL_RUNTIME_APT_COMMAND=""
ARG ADDITIONAL_RUNTIME_APT_ENV=""
ARG INSTALL_MYSQL_CLIENT="true"
ARG INSTALL_MSSQL_CLIENT="true"
-ARG AIRFLOW_USER_HOME_DIR=/home/airflow
+ARG AIRFLOW_USER_HOME_DIR
ARG AIRFLOW_HOME
# Having the variable in final image allows to disable providers manager
warnings when
# production image is prepared from sources rather than from package
@@ -431,13 +450,14 @@ RUN chmod a+x /scripts/docker/install_mysql.sh && \
mkdir -pv "${AIRFLOW_HOME}"; \
mkdir -pv "${AIRFLOW_HOME}/dags"; \
mkdir -pv "${AIRFLOW_HOME}/logs"; \
- chown -R "airflow:root" "${AIRFLOW_USER_HOME_DIR}" "${AIRFLOW_HOME}"; \
+ chown -R airflow:0 "${AIRFLOW_USER_HOME_DIR}" "${AIRFLOW_HOME}"; \
find "${AIRFLOW_HOME}" -executable -print0 | xargs --null chmod g+x && \
find "${AIRFLOW_HOME}" -print0 | xargs --null chmod g+rw
-COPY --chown=airflow:root --from=airflow-build-image /root/.local
"${AIRFLOW_USER_HOME_DIR}/.local"
-COPY --chown=airflow:root scripts/in_container/prod/entrypoint_prod.sh
/entrypoint
-COPY --chown=airflow:root scripts/in_container/prod/clean-logs.sh /clean-logs
+COPY --chown=airflow:0 --from=airflow-build-image \
+ "${AIRFLOW_USER_HOME_DIR}/.local" "${AIRFLOW_USER_HOME_DIR}/.local"
+COPY --chown=airflow:0 scripts/in_container/prod/entrypoint_prod.sh /entrypoint
+COPY --chown=airflow:0 scripts/in_container/prod/clean-logs.sh /clean-logs
# Make /etc/passwd root-group-writeable so that user can be dynamically added
by OpenShift
# See https://github.com/apache/airflow/issues/9248