This is an automated email from the ASF dual-hosted git repository. potiuk pushed a commit to branch v2-8-test in repository https://gitbox.apache.org/repos/asf/airflow.git
commit b3996f92520e5f0a1a2027ad25cc3a0eee6e6f2a Author: Jarek Potiuk <[email protected]> AuthorDate: Tue Nov 28 14:02:52 2023 +0100 Make rootless-docker documented first-class-citizen in Breeze (#35917) Since rootless docker is becoming more and more popular and soon likely becomes standard, we should recognise it in our decisions when it comes to runnig Breeze as it changes some of the basic assumptions about files and ownership we had when we designed how we are running containers in Breeze. Reviewed the places where "rootless docker" had an impact and fixed a typo in fix-ownership (it did not matter because fix-ownership was not even executed in linux in the first place, but it should be fixed for consistency). Documented the rootless docker in the ADRs for breeze as this is building on top of the ADR 6 and ADR 14 where the root user and ownership fixing decisions were made. (cherry picked from commit 6bbe63bb37df1551959e399d48e3ea8f0f672119) --- .../doc/adr/0015-handling-rootless-docker.md | 67 ++++++++++++++++++++++ dev/breeze/src/airflow_breeze/utils/run_utils.py | 24 ++++++++ scripts/in_container/configure_environment.sh | 4 +- scripts/in_container/run_fix_ownership.py | 2 +- scripts/in_container/run_prepare_er_diagram.py | 3 +- 5 files changed, 97 insertions(+), 3 deletions(-) diff --git a/dev/breeze/doc/adr/0015-handling-rootless-docker.md b/dev/breeze/doc/adr/0015-handling-rootless-docker.md new file mode 100644 index 0000000000..65695a9518 --- /dev/null +++ b/dev/breeze/doc/adr/0015-handling-rootless-docker.md @@ -0,0 +1,67 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + --> + +<!-- START doctoc generated TOC please keep comment here to allow auto update --> +<!-- DON'T EDIT THIS SECTION, INSTEAD RE-RUN doctoc TO UPDATE --> +**Table of Contents** *generated with [DocToc](https://github.com/thlorenz/doctoc)* + +- [15. Handling rootless docker](#15-handling-rootless-docker) + - [Status](#status) + - [Context](#context) + - [Decision](#decision) + - [Consequences](#consequences) + +<!-- END doctoc generated TOC please keep comment here to allow auto update --> + +# 15. Handling rootless docker + +Date: 2023-11-29 + +## Status + +Accepted + +Builds on [6. Using root user and fixing ownership for-ci-container](0006-using-root-user-and-fixing-ownership-for-ci-container.md) +Builds on [14. Fix root ownership after exiting docker command](0014-fix-root-ownership-after-exiting-docker-command.md) + +## Context + +[Rootless docker](https://docs.docker.com/engine/security/rootless/) solutions are becoming more and +more popular. They are more secure to run and they allow to run docker containers without root privileges. +It is expected that in the near future, rootless docker will become the default way of running docker. + +In case of rootless docker, the assumptions from both 6. and 14. ADRs are not valid. The user running +docker is re-mapped from the original user in the host that run the container. This means that the +ownership of files created in the container does not have to be fixed (the user ids will be re-mapped +from the container back to the host automatically) and that the dag folder owned by the user on the +host will be automatically owned by the mapped user inside the container. + +This means that we do not need to neither fix the ownership nor change the ownership when the docker +is in rootless mode. + +## Decision + +When we enter breeze container we check if docker is running in rootless mode, and we +have a `DOCKER_IS_ROOTLESS` variable set to `true` when entering the container. This variable might +then be used to make decision on changing ownership of the files inside the container. + +## Consequences + +Users running Breeze on Linux will have less problems with root owned files and we can also remove +dedicated `ci fix-ownership` command in CI. diff --git a/dev/breeze/src/airflow_breeze/utils/run_utils.py b/dev/breeze/src/airflow_breeze/utils/run_utils.py index 0707cdad11..874ae954d0 100644 --- a/dev/breeze/src/airflow_breeze/utils/run_utils.py +++ b/dev/breeze/src/airflow_breeze/utils/run_utils.py @@ -33,6 +33,8 @@ from typing import Mapping, Union from rich.markup import escape +from airflow_breeze.branch_defaults import AIRFLOW_BRANCH +from airflow_breeze.global_constants import APACHE_AIRFLOW_GITHUB_REPOSITORY from airflow_breeze.utils.ci_group import ci_group from airflow_breeze.utils.console import Output, get_console from airflow_breeze.utils.path_utils import ( @@ -380,6 +382,28 @@ def check_if_image_exists(image: str) -> bool: return cmd_result.returncode == 0 +def get_ci_image_for_pre_commits() -> str: + github_repository = os.environ.get("GITHUB_REPOSITORY", APACHE_AIRFLOW_GITHUB_REPOSITORY) + python_version = "3.8" + airflow_image = f"ghcr.io/{github_repository}/{AIRFLOW_BRANCH}/ci/python{python_version}" + skip_image_pre_commits = os.environ.get("SKIP_IMAGE_PRE_COMMITS", "false") + if skip_image_pre_commits[0].lower() == "t": + get_console().print( + f"[info]Skipping image check as SKIP_IMAGE_PRE_COMMITS is set to {skip_image_pre_commits}[/]" + ) + sys.exit(0) + if not check_if_image_exists( + image=airflow_image, + ): + get_console().print(f"[red]The image {airflow_image} is not available.[/]\n") + get_console().print( + f"\n[yellow]Please run this to fix it:[/]\n\n" + f"breeze ci-image build --python {python_version}\n\n" + ) + sys.exit(1) + return airflow_image + + def _run_compile_internally(command_to_execute: list[str], dev: bool) -> RunCommandResult: from filelock import SoftFileLock, Timeout diff --git a/scripts/in_container/configure_environment.sh b/scripts/in_container/configure_environment.sh index 6d0cbae41e..ea314a5196 100644 --- a/scripts/in_container/configure_environment.sh +++ b/scripts/in_container/configure_environment.sh @@ -24,7 +24,9 @@ readonly TMUX_CONF_FILE=".tmux.conf" if [[ -d "${FILES_DIR}" ]]; then export AIRFLOW__CORE__DAGS_FOLDER="/files/dags" mkdir -pv "${AIRFLOW__CORE__DAGS_FOLDER}" - sudo chown "${HOST_USER_ID}":"${HOST_GROUP_ID}" "${AIRFLOW__CORE__DAGS_FOLDER}" + if [[ ${HOST_OS} == "linux" && ${DOCKER_IS_ROOTLESS} != "true" ]]; then + sudo chown "${HOST_USER_ID}":"${HOST_GROUP_ID}" "${AIRFLOW__CORE__DAGS_FOLDER}" || true + fi else export AIRFLOW__CORE__DAGS_FOLDER="${AIRFLOW_HOME}/dags" fi diff --git a/scripts/in_container/run_fix_ownership.py b/scripts/in_container/run_fix_ownership.py index 68fc3a9904..d2b7809988 100755 --- a/scripts/in_container/run_fix_ownership.py +++ b/scripts/in_container/run_fix_ownership.py @@ -74,7 +74,7 @@ if __name__ == "__main__": if HOST_OS == "": print("ERROR: HOST_OS environment variable is not set") sys.exit(1) - if HOST_OS == "Linux": + if HOST_OS != "linux": print("Since host OS is not Linux, we don't need to fix ownership.") sys.exit(0) if DOCKER_IS_ROOTLESS: diff --git a/scripts/in_container/run_prepare_er_diagram.py b/scripts/in_container/run_prepare_er_diagram.py index 53e297394b..282cc03324 100755 --- a/scripts/in_container/run_prepare_er_diagram.py +++ b/scripts/in_container/run_prepare_er_diagram.py @@ -54,7 +54,8 @@ if __name__ == "__main__": ) HASH_FILE.write_text(sha256hash) host_os = os.environ.get("HOST_OS") - if host_os and host_os.lower() == "linux": + docker_is_rootless = os.environ.get("DOCKER_IS_ROOTLESS", "false") == "true" + if host_os and host_os.lower() == "linux" and not docker_is_rootless: try: host_uid = int(os.environ["HOST_USER_ID"]) host_gid = int(os.environ["HOST_GROUP_ID"])
