This is an automated email from the ASF dual-hosted git repository. potiuk pushed a commit to branch v1-10-test in repository https://gitbox.apache.org/repos/asf/airflow.git
commit af6aed47e13d1f194ead57637449a233065ec452 Author: Jarek Potiuk <[email protected]> AuthorDate: Sun May 10 19:55:40 2020 +0200 Add comments to breeze scripts (#8797) (cherry picked from commit 9bb91ef9912c0c9cda8b7eecd365338053d14aa7) --- breeze | 183 +++++++++++++++-------- scripts/ci/_utils.sh | 212 ++++++++++++++++++++------- scripts/ci/docker-compose/backend-sqlite.yml | 2 +- 3 files changed, 284 insertions(+), 113 deletions(-) diff --git a/breeze b/breeze index 96d6f0e..4ec67c6 100755 --- a/breeze +++ b/breeze @@ -30,38 +30,52 @@ declare -a REMAINING_ARGS # This is where static check options are defined declare -a EXTRA_STATIC_CHECK_OPTIONS - +# Sets up all the default variables for Breeze +# They are needed by all other functions function setup_default_breeze_variables() { - # Whether to actually run docker compose with the command set given + # Default command to run - entering breeze environment COMMAND_TO_RUN="enter_breeze" + # In some cases we also want to run two commands in a row (for example when we restart the environment) SECOND_COMMAND_TO_RUN="" + # Indicates that we are inside Breeze environment export BREEZE=true + # Maximum screen width to print the lines spanning the whole terminal width export MAX_SCREEN_WIDTH=100 + # This is where airflow sources reside export AIRFLOW_SOURCES="${MY_DIR}" # Directory where all CI scripts are located SCRIPTS_CI_DIR="${MY_DIR}/scripts/ci" + # Directory where all the build cache is stored - we keep there status of all the docker images + # As well as hashes of the important files, but also we generate build scripts there that are + # Used to execute the commands for breeze BUILD_CACHE_DIR="${MY_DIR}/.build" + # This folder is mounted to inside the container in /files folder. This is the way how + # We can exchange DAGs, scripts, packages etc with the container environment FILES_DIR="${MY_DIR}/files" + # Temporary dir used well ... temporarily TMP_DIR="${MY_DIR}/tmp" + # Create those folders above in case they do not exist mkdir -pv "${BUILD_CACHE_DIR}" mkdir -pv "${TMP_DIR}" mkdir -pv "${FILES_DIR}" - # Note - we do not use __script_init.sh here because it can only be used from within - # the CI directory and we need to override PYTHON_MAJOR_MINOR_VERSION based on what we store - # in the .build directory - - # Beginning of the initialisation here - + # load all the common functions here - those are the functions that are shared between Breeze + # and CI scripts (CI scripts do not use Breeze as execution environment) # shellcheck source=scripts/ci/_utils.sh . "${SCRIPTS_CI_DIR}/_utils.sh" + # We have different versions of images depending on the python version used. We keep up with the + # Latest patch-level changes in Python (this is done automatically during CI builds) so we have + # To only take into account MAJOR and MINOR version of python. This variable keeps the major/mninor + # version of python in X.Y format (3.6, 3.7 etc). export PYTHON_MAJOR_MINOR_VERSION="${PYTHON_MAJOR_MINOR_VERSION:=$(read_from_file PYTHON_MAJOR_MINOR_VERSION)}" + # When we generate documentation for README files, we want to force the width of terminal so that + # No matter who is running the documentation generation gets the same output if [[ ${FORCE_SCREEN_WIDTH:="false"} != "true" ]]; then # Sets width of the screen from terminal SCREEN_WIDTH="$(tput cols)" @@ -75,52 +89,47 @@ function setup_default_breeze_variables() { SCREEN_WIDTH=${MAX_SCREEN_WIDTH} fi - # Name of the script + # Name of the script is kept in this variable CMDNAME="$(basename -- "$0")" # Update short and long options in the breeze-complete script - # This way autocomplete will work automatically with all options + # This way autocomplete will work automatically with all options available # shellcheck source=breeze-complete . "${MY_DIR}/breeze-complete" - # Skips mounting local Airflow sources + # By default we mount local Airflow sources MOUNT_LOCAL_SOURCES="true" - # Holds last sub-command used + # Holds last sub-command used - this is used by --help flag to print help for the command entered LAST_SUBCOMMAND="" # Determines if help should be run (set to true by --help flag) RUN_HELP="false" - # Holds chosen command if the -x flag is used. + # Holds chosen command to run in case 'execute-command' command is used. RUN_COMMAND="" - # Holds the test target if the -t flag is used. + # Holds the test target if the 'test-target' command is used. TEST_TARGET="" - # Holds docker compose command if the -d flag is used. + # Holds docker compose command if the `docker-compose` command is used. DOCKER_COMPOSE_COMMAND="" - # If true, the docker images are rebuilt locally. + # If set to true, the docker images are rebuilt locally. By default we assume we do not need to + # rebuild the image but if we find we do, this variable will be set to "true" export NEEDS_DOCKER_BUILD="false" # By default we only pull images if we do not have them locally. - # This can be overridden by -p flag + # This can be overridden by '--force-pull-images' flag export FORCE_PULL_IMAGES="false" - # Runtime is empty initially (might be set to kubernetes in case kubernetes is chosen) + # Runtime is empty by default (might be set to kubernetes in case kubernetes is chosen) RUNTIME="" - # Do not enable Kind Kubernetes cluster by default - export ENABLE_KIND_CLUSTER="false" - - # By default we do not push images. This can be overridden by -u flag. - export PUSH_IMAGES=${PUSH_IMAGES:="false"} - - # Forward credentials to docker + # Forward common host credentials to docker (gcloud, aws etc.). export FORWARD_CREDENTIALS="false" - # Reset DB at entry + # If set to true, the database will be reset at entry. Works for Postgres and MySQL export DB_RESET="false" # If it set is set to specified version, then the source version of Airflow @@ -131,18 +140,15 @@ function setup_default_breeze_variables() { # of Airflow is removed and the specified version of Airflow is installed from GitHub export INSTALL_AIRFLOW_REFERENCE=${INSTALL_AIRFLOW_REFERENCE:=""} - # Determine version of the Airflow from version.py - AIRFLOW_VERSION=$(grep version "airflow/version.py" | awk '{print $3}' | sed "s/['+]//g") - export AIRFLOW_VERSION - - # Whether to force build without checking if it is needed + # Determines whether to force build without checking if it is needed + # Can be overridden by '--force-build-images' flag. export FORCE_BUILD_IMAGES=${FORCE_BUILD_IMAGES:="false"} - # Files determining whether ASCII-art/cheat-sheet are suppressed + # If those files are present, the ASCII-art/cheat-sheet are suppressed SUPPRESS_CHEATSHEET_FILE="${MY_DIR}/.suppress_cheatsheet" SUPPRESS_ASCIIART_FILE="${MY_DIR}/.suppress_asciiart" - # Default values for flags + # Default values for the flags used _BREEZE_DEFAULT_BACKEND="sqlite" _BREEZE_DEFAULT_KUBERNETES_MODE="git_mode" @@ -152,10 +158,14 @@ function setup_default_breeze_variables() { STATIC_CHECK_PYTHON_MAJOR_MINOR_VERSION=3.6 } -# End of initialisation here +# Initializes development-friendly virtualenv if you are already in such env. It installs all the necessary +# packages from PyPI and it case of problems it provides useful hints on what prerequisites should be +# installed. It also removes and resets the existing AIRFOW_HOME installation to make sure that you +# have it synchronized with the version of airflow installed. It resets the airflow's sqlite database to +# a clean state. You can use this function if your virtualenv is broken, to clean it up function initialize_virtualenv() { - # Check if we are in virtualenv + # Check if we are inside virtualenv set +e echo -e "import sys\nif not hasattr(sys,'base_prefix'):\n sys.exit(1)" | "python${PYTHON_MAJOR_MINOR_VERSION}" RES=$? @@ -218,6 +228,8 @@ function initialize_virtualenv() { fi } + +# Sets up autocomplete for Breeze for both - bash and zsh function setup_autocomplete() { echo "Installing bash/zsh completion for local user" echo "Note that completion for zsh is just limited to flags - without their values" @@ -280,6 +292,8 @@ EOF exit 0 } +# Prints information about the current configuration of Breeze - if you enter breeze interactively +# and you did not suppress cheatsheet or asciiart, it also prints those function print_badge { if [[ ${BACKEND} == "postgres" ]]; then BACKEND_VERSION="${POSTGRES_VERSION}" @@ -426,6 +440,12 @@ EOF fi } +# Prepares command file that can be used to easily run the commands without the need of using Breeze +# The command file generated in cache ./build directory is a standalone script that contains +# All the environment variables and docker-compose configuration to run the command. This is because +# depending on configuration of Breeze we might have different compose files used and different variables +# set. Those are a convenience scripts that you might use to debug command execution although +# In most cases they are used internally by Breeze function prepare_command_file() { local FILE="${1}" local CMD="${2}" @@ -457,12 +477,21 @@ export MYSQL_ENCODING="${MYSQL_ENCODING}" export AIRFLOW_CI_IMAGE="${AIRFLOW_CI_IMAGE}" export AIRFOW_PROD_IMAGE="${AIRFLOW_PROD_IMAGE}" export AIRFLOW_IMAGE="${AIRFLOW_IMAGE}" -export SQLITE_VERSION="${SQLITE_VERSION}" +export SQLITE_URL="${SQLITE_URL}" docker-compose --log-level INFO ${CMD}\$${EXPANSION}" EOF chmod u+x "${FILE}" } + +# Prepare all command files that we are using. Depending on the command to execute we use different +# convenience scripts: +# cmd_run_ci - to run CI image command (for example entering Breeze, or running a script in CI image) +# cmd_run_prod - to run PROD image command (for example entering Prod image or running a script there) +# test_run_ci - to run test target in CI image +# dc_ci - to run docker compose command for CI image +# dc_prod - to run docker compose command for PROD image +# function prepare_command_files() { MAIN_CI_DOCKER_COMPOSE_FILE=${SCRIPTS_CI_DIR}/docker-compose/base.yml MAIN_PROD_DOCKER_COMPOSE_FILE=${SCRIPTS_CI_DIR}/docker-compose/base.yml @@ -547,6 +576,8 @@ function prepare_command_files() { '"' "false" "${COMPOSE_PROD_FILE}" "${AIRFLOW_PROD_IMAGE}" } +# Prints detailed help for all commands and flgas. Used to generate documentation added to BREEZE.rst +# automatically. function do_help_all() { echo print_line @@ -568,6 +599,7 @@ function do_help_all() { flags } +# Parses all arguments that can be passed to Breeze command - that includes command to run and flags. function parse_arguments() { set -u if ! PARAMS=$(getopt \ @@ -731,7 +763,7 @@ function parse_arguments() { shift ;; -I|--production-image) export PRODUCTION_IMAGE="true" - export SQLITE_VERSION= + export SQLITE_URL= echo echo "*************** PRODUCTION IMAGE *************************" echo @@ -942,6 +974,9 @@ function parse_arguments() { REMAINING_ARGS+=("$@") } +# Prepares nicely formatted versions of list of allowed and default values defined in Breeze. +# It is used in help command to print the lists in a readable format and fold the lists +# so that they fit the screen width. function prepare_formatted_versions() { INDENT=15 LIST_PREFIX=$(printf "%-${INDENT}s" " ") @@ -970,6 +1005,8 @@ function prepare_formatted_versions() { tr ',' ' ' | fold -w "${WIDTH}" -s | sed "s/ /,/g; s/^/${LIST_PREFIX}/") } +# Prepares usage information for all the commands in Breeze. +# Those usage commands are stored in appropriate environment variables. function prepare_usage() { # Note that MacOS uses Bash 3.* and we cannot use associative arrays export USAGE_SHELL="[Default] Enters interactive shell in the container" @@ -1197,6 +1234,7 @@ $(flag_footer) " } +# Gets environment variable value converting the lowercase name of command into variable name function get_variable_from_lowercase_name() { PREFIX="${1}" NAME="${2}" @@ -1205,15 +1243,18 @@ function get_variable_from_lowercase_name() { echo "${!VARIABLE_NAME}" } +# Gets usage information from lowercase command function get_usage() { get_variable_from_lowercase_name USAGE "${1}" } + +# Gets detailed usage information from lowercase command function get_detailed_usage() { get_variable_from_lowercase_name DETAILED_USAGE "${1}" } - +# Prints general usage information function usage() { echo " @@ -1245,6 +1286,7 @@ Help commands: echo } +# Prints detailed usage for command specified function detailed_usage() { SUBCOMMAND=${1} echo " @@ -1257,12 +1299,14 @@ $(get_detailed_usage "${SUBCOMMAND}") " } +# Prints flag footer function flag_footer() { echo " Run '${CMDNAME} flags' to see all applicable flags. " } +# Prints flags for different variants of airflow to use function flag_airflow_variants() { echo " -p, --python <PYTHON_MAJOR_MINOR_VERSION> @@ -1273,6 +1317,7 @@ ${FORMATTED_PYTHON_MAJOR_MINOR_VERSIONS} " } +# Prints flags for different backend to use function flag_backend_variants() { echo " -b, --backend <BACKEND> @@ -1295,6 +1340,7 @@ ${FORMATTED_MYSQL_VERSIONS} " } +# Prints production image flgas function flag_production_image() { echo " -I, --production-image @@ -1302,6 +1348,7 @@ function flag_production_image() { " } +# Prints additional breeze action flags function flag_breeze_actions() { echo " -d, --db-reset @@ -1322,6 +1369,7 @@ ${FORMATTED_INTEGRATIONS} " } +# Prints Kubernetes action flags function flag_kubernetes_actions() { echo " Action for the cluster : only one of the --kind-cluster-* flags can be used at a time: @@ -1364,6 +1412,7 @@ ${FORMATTED_KUBERNETES_VERSIONS} " } +# Prints flags that determine what is the source mounting scheme function flag_local_file_mounting() { echo " -l, --skip-mounting-local-sources @@ -1372,6 +1421,7 @@ function flag_local_file_mounting() { " } +# Prints flags that allow to choose different airflow variants function flag_choose_different_airflow_version() { echo " -a, --install-airflow-version <INSTALL_AIRFLOW_VERSION> @@ -1386,6 +1436,7 @@ ${FORMATTED_INSTALL_AIRFLOW_VERSIONS} " } +# Prints flags that allow to set assumed answers to questions function flag_assume_answers_to_questions() { echo " -y, --assume-yes @@ -1399,6 +1450,7 @@ function flag_assume_answers_to_questions() { " } +# Prints flags that are used for credential forwarding function flag_credentials() { echo " -f, --forward-credentials @@ -1407,6 +1459,7 @@ function flag_credentials() { " } +# Prints flags that control verbosity function flag_verbosity() { echo " -v, --verbose @@ -1416,6 +1469,7 @@ function flag_verbosity() { " } +# Prints flags controlling docker build process function flag_build_docker_images() { echo " -F, --force-build-images @@ -1447,6 +1501,7 @@ ${FORMATTED_DEFAULT_PROD_EXTRAS} " } +# Prints flags controlling docker push process function flag_push_docker_images() { echo " -u, --push-images @@ -1462,6 +1517,8 @@ function flag_push_docker_images() { " } + +# Prints all flags function flags() { echo " $(print_line) @@ -1518,6 +1575,7 @@ $(flag_verbosity) " } +# Prints header line filling screen width - only when VERBOSE is set function print_header_line() { if [ ${VERBOSE:="false"} == "true" ]; then echo @@ -1527,14 +1585,21 @@ function print_header_line() { } +# Prints separation line filling screen width function print_line { printf '#%.0s' $(seq "${SCREEN_WIDTH}") } +# Prints star line filling screen width function print_star_line { printf '*%.0s' $(seq "${SCREEN_WIDTH}") } +# Reads save environment variables. Some of the variables are stored across session so that once +# you use them you do not have to use it next time. That makes those flags persistent +# An example of it is `--backend` or '--kubernetes-mode' flags. Note that PYTHON_MAJOR_MINOR_VERSION is +# not read here - it is read at the `setup_default_breeze_variables` method because it is needed +# t determine the right images to use and set several variables that depend on the Python version function read_saved_environment_variables { export BACKEND="${BACKEND:=$(read_from_file BACKEND)}" export BACKEND=${BACKEND:-${_BREEZE_DEFAULT_BACKEND}} @@ -1553,17 +1618,19 @@ function read_saved_environment_variables { # Here you read DockerHub user/account that you use # You can populate your own images in DockerHub this way and work with the, - # You can override it with "-d" option and it will be stored in .build directory + # You can override it with "--dockerhub-user" option and it will be stored in .build directory export DOCKERHUB_USER="${DOCKERHUB_USER:=$(read_from_file DOCKERHUB_USER)}" export DOCKERHUB_USER="${DOCKERHUB_USER:=${_BREEZE_DEFAULT_DOCKERHUB_USER}}" # Here you read DockerHub repo that you use # You can populate your own images in DockerHub this way and work with them - # You can override it with "-d" option and it will be stored in .build directory + # You can override it with "--dockerhub-repo" option and it will be stored in .build directory export DOCKERHUB_REPO="${DOCKERHUB_REPO:=$(read_from_file DOCKERHUB_REPO)}" export DOCKERHUB_REPO="${DOCKERHUB_REPO:=${_BREEZE_DEFAULT_DOCKERHUB_REPO}}" } +# Checks if variables are correctly set and if they are - saves them so that they can be used across +# sessions. function check_and_save_all_params() { check_and_save_allowed_param "PYTHON_MAJOR_MINOR_VERSION" "Python version" "--python" check_and_save_allowed_param "BACKEND" "backend" "--backend" @@ -1572,31 +1639,12 @@ function check_and_save_all_params() { check_and_save_allowed_param "POSTGRES_VERSION" "Postgres version" "--postgres-version" check_and_save_allowed_param "MYSQL_VERSION" "Mysql version" "--mysql-version" - # Can't verify those + # Can't verify those - they can be anything, so let's just save them save_to_file DOCKERHUB_USER save_to_file DOCKERHUB_REPO } -function fix_local_file { - if [[ -d "${MY_DIR}/${1}" ]]; then - rm -rf "${MY_DIR:?}/${1}" - fi - touch "${MY_DIR}/${1}" - -} - -function touch_local_files { - # Those files are mounted into container when run locally - # .bash_history is preserved and you can modify .bash_aliases and .inputrc - # according to your liking - fix_local_file ".bash_history" - fix_local_file ".bash_aliases" - fix_local_file ".inputrc" - # When KinD cluster is created, the folder keeps authentication information - # across sessions - mkdir -pv "${MY_DIR}/.kube" -} - +# Prints cheatsheet if it is not suppressed function print_cheatsheet() { if [[ ! -f ${SUPPRESS_CHEATSHEET_FILE} && ${COMMAND_TO_RUN} == "enter_breeze" ]]; then echo @@ -1656,6 +1704,8 @@ function print_cheatsheet() { fi } +# Prints setup instruction in case we find that autocomplete is not set +# also prints how to toggle asciiart/cheatsheet function print_setup_instructions { if [[ ${COMMAND_TO_RUN} == "enter_breeze" ]] ; then # shellcheck disable=SC2034 # Unused variables left for comp_breeze usage @@ -1680,6 +1730,8 @@ function print_setup_instructions { fi } +# Checks that pre-commit is installed and upgrades it if needed +# this is used in case static check command is used function make_sure_precommit_is_installed { echo echo "Making sure pre-commit is installed" @@ -1700,12 +1752,15 @@ function make_sure_precommit_is_installed { export PATH="${PATH}":~/.local/bin } +# Removes CI and PROD images and cleans up the flag that indicates that the image was already built function remove_images { docker rmi "${PYTHON_BASE_IMAGE}" || true docker rmi "${AIRFLOW_CI_IMAGE}" || true + docker rmi "${AIRFLOW_PROD_IMAGE}" || true rm -f "${BUILT_IMAGE_FLAG_FILE}" } +# Runs chosen static checks function run_static_checks { if [[ ${STATIC_CHECK} == "all" ]]; then echo @@ -1720,6 +1775,8 @@ function run_static_checks { fi } +# Runs Build before a comman - it will check and if needed rebuild necessary image, depending on the +# command chosen function run_build_command { case "${COMMAND_TO_RUN}" in run_tests|run_docker_compose|run_in_bash) @@ -1766,6 +1823,8 @@ function run_build_command { esac } +# Runs the actual command - depending on the command chosen it will use the right +# Convenient script and run the right command with it function run_breeze_command { set -u case "${COMMAND_TO_RUN}" in @@ -1875,7 +1934,7 @@ read_saved_environment_variables check_and_save_all_params -touch_local_files +sanitize_mounted_files prepare_command_files diff --git a/scripts/ci/_utils.sh b/scripts/ci/_utils.sh index bc694a3..8ac07ab 100644 --- a/scripts/ci/_utils.sh +++ b/scripts/ci/_utils.sh @@ -19,11 +19,14 @@ # Assume all the scripts are sourcing the _utils.sh from the scripts/ci directory # and MY_DIR variable is set to this directory. It can be overridden however +# extra flags passed to the docker run for CI image commands (as Bash array) declare -a EXTRA_DOCKER_FLAGS +# extra flags passed to the docker run for PROD image commands(as Bash array) declare -a EXTRA_DOCKER_PROD_BUILD_FLAGS export EXTRA_DOCKER_FLAGS export EXTRA_DOCKER_PROD_BUILD_FLAGS +# In case "VERBOSE_COMMANDS" is set to "true" set -x is used to enable debugging function check_verbose_setup { if [[ ${VERBOSE_COMMANDS:="false"} == "true" ]]; then set -x @@ -33,6 +36,8 @@ function check_verbose_setup { } +# In case "VERBOSE" is set to "true" (--verbose flag in Breeze) all docker commands run will be +# printed before execution function verbose_docker { if [[ ${VERBOSE:="false"} == "true" ]]; then echo "docker" "${@}" @@ -42,28 +47,36 @@ function verbose_docker { # Common environment that is initialized by both Breeze and CI scripts function initialize_common_environment { - # default python version + # default python Major/Minor version PYTHON_MAJOR_MINOR_VERSION=${PYTHON_MAJOR_MINOR_VERSION:="3.5"} + # Sets to where airflow sources are located AIRFLOW_SOURCES=${AIRFLOW_SOURCES:=$(cd "${MY_DIR}/../../" && pwd)} export AIRFLOW_SOURCES + # Sets to the build cache directory - status of build and convenience scripts are stored there BUILD_CACHE_DIR="${AIRFLOW_SOURCES}/.build" export BUILD_CACHE_DIR + # File to keep the last forced answer. This is useful for pre-commits where you need to + # only answer once if the image should be rebuilt or not and your answer is used for + # All the subsequent questions LAST_FORCE_ANSWER_FILE="${BUILD_CACHE_DIR}/last_force_answer.sh" - # Create directories if needed + # Create useful directories if not yet created mkdir -p "${AIRFLOW_SOURCES}/.mypy_cache" mkdir -p "${AIRFLOW_SOURCES}/logs" mkdir -p "${AIRFLOW_SOURCES}/tmp" mkdir -p "${AIRFLOW_SOURCES}/files" mkdir -p "${AIRFLOW_SOURCES}/dist" + # Read common values used across Breeze and CI scripts # shellcheck source=common/_common_values.sh . "${AIRFLOW_SOURCES}/common/_common_values.sh" + # Read image-specific values used across Breeze and CI scripts # shellcheck source=common/_image_variables.sh . "${AIRFLOW_SOURCES}/common/_image_variables.sh" + # Read information about files that are checked if image should be rebuilt # shellcheck source=common/_files_for_rebuild_check.sh . "${AIRFLOW_SOURCES}/common/_files_for_rebuild_check.sh" @@ -79,7 +92,8 @@ function initialize_common_environment { export POSTGRES_VERSION=${POSTGRES_VERSION:="9.6"} export MYSQL_VERSION=${MYSQL_VERSION:="5.6"} - # Do not push images from here by default (push them directly from the build script on Dockerhub) + # Do not push images by default (push them directly from the build script on Dockerhub or when + # --push-images flag is specified export PUSH_IMAGES=${PUSH_IMAGES:="false"} # Disable writing .pyc files - slightly slower imports but not messing around when switching @@ -92,24 +106,28 @@ function initialize_common_environment { # Sets mounting of host volumes to container for static checks # unless MOUNT_HOST_AIRFLOW_VOLUME is not true # - # Note that this cannot be function because we need the EXTRA_DOCKER_FLAGS array variable - # MOUNT_HOST_AIRFLOW_VOLUME=${MOUNT_HOST_AIRFLOW_VOLUME:="true"} export MOUNT_HOST_AIRFLOW_VOLUME # If this variable is set, we mount the whole sources directory to the host rather than - # selected volumes + # selected volumes. This is needed to check ALL source files during licence check + # for example MOUNT_SOURCE_DIR_FOR_STATIC_CHECKS=${MOUNT_SOURCE_DIR_FOR_STATIC_CHECKS="false"} export MOUNT_SOURCE_DIR_FOR_STATIC_CHECKS - # Set host user id to current user + # Set host user id to current user. This is used to set the ownership properly when exiting + # The container on Linux - all files created inside docker are created with root user + # but they should be restored back to the host user HOST_USER_ID="$(id -ur)" export HOST_USER_ID - # Set host group id to current group + # Set host group id to current group This is used to set the ownership properly when exiting + # The container on Linux - all files created inside docker are created with root user + # but they should be restored back to the host user HOST_GROUP_ID="$(id -gr)" export HOST_GROUP_ID + # Add the right volume mount for sources, depending which mount strategy is used if [[ ${MOUNT_SOURCE_DIR_FOR_STATIC_CHECKS} == "true" ]]; then print_info print_info "Mount whole airflow source directory for static checks (make sure all files are in container)" @@ -137,6 +155,8 @@ function initialize_common_environment { ) fi + # In case of the CI build get environment variables from codecov.io and + # set it as the extra docker flags. As described in https://docs.codecov.io/docs/testing-with-docker if [[ ${CI:=} == "true" ]]; then CI_CODECOV_ENV="$(bash <(curl -s https://codecov.io/env))" for ENV_PARAM in ${CI_CODECOV_ENV} @@ -147,31 +167,37 @@ function initialize_common_environment { EXTRA_DOCKER_PROD_BUILD_FLAGS=() # We use pulled docker image cache by default to speed up the builds + # but we can also set different docker caching strategy (for example we can use local cache + # to build the images in case we iterate with the image export DOCKER_CACHE=${DOCKER_CACHE:="pulled"} - # By default we are not upgrading to latest requirements - # This will only be done in cron jobs and when run with breeze + # By default we are not upgrading to latest requirements when building Docker CI image + # This will only be done in cron jobs export UPGRADE_TO_LATEST_REQUIREMENTS=${UPGRADE_TO_LATEST_REQUIREMENTS:="false"} + # In case of MacOS we need to use gstat - gnu version of the stats STAT_BIN=stat if [[ "${OSTYPE}" == "darwin"* ]]; then STAT_BIN=gstat fi + # Read airflow version from the version.py AIRFLOW_VERSION=$(grep version "airflow/version.py" | awk '{print $3}' | sed "s/['+]//g") export AIRFLOW_VERSION - # default version for dockerhub images + # default version of python used to tag the "master" and "latest" images in DockerHub export DEFAULT_PYTHON_MAJOR_MINOR_VERSION=3.6 - + # In case we are not in CI - we assume we run locally. There are subtle changes if you run + # CI scripts locally - for example requirements are eagerly updated if you do local run + # in generate requirements if [[ ${CI:="false"} == "true" ]]; then export LOCAL_RUN="false" else export LOCAL_RUN="true" fi - # upgrade while generating requirements should only happen in localy run + # eager upgrade while generating requirements should only happen in locally run # pre-commits or in cron job if [[ ${LOCAL_RUN} == "true" ]]; then export UPGRADE_WHILE_GENERATING_REQUIREMENTS="true" @@ -179,27 +205,35 @@ function initialize_common_environment { export UPGRADE_WHILE_GENERATING_REQUIREMENTS=${UPGRADE_WHILE_GENERATING_REQUIREMENTS:="false"} fi + # Default extras used for building CI image export DEFAULT_CI_EXTRAS="devel_ci" + # Default extras used for building Production image. The master of this information is in the Dockerfile DEFAULT_PROD_EXTRAS=$(grep "ARG AIRFLOW_EXTRAS=" "${AIRFLOW_SOURCES}/Dockerfile"| awk 'BEGIN { FS="=" } { print $2 }' | tr -d '"') - # By default we build CI images but when we specify --producton-image we switch to production image + # By default we build CI images but when we specify `--producton-image` we switch to production image export PRODUCTION_IMAGE="false" - export SQLITE_VERSION="sqlite:////root/airflow/airflow.db" + # The SQLlite URL used for sqlite runs + export SQLITE_URL="sqlite:////root/airflow/airflow.db" + # Determines if airflow should be installed from a specified reference in GitHub export INSTALL_AIRFLOW_REFERENCE="" } - +# Prints verbose information in case VERBOSE variable is set function print_info() { if [[ ${VERBOSE:="false"} == "true" ]]; then echo "$@" fi } +# Those are files that are mounted locally when mounting local sources is requested +# By default not the whole airflow sources directory is mounted because there are often +# artifacts created there (for example .egg-info files) that are breaking the capability +# of running different python versions in Breeze. So we only mount what is needed by default. LOCAL_MOUNTS=" .bash_aliases /root/ .bash_history /root/ @@ -234,8 +268,9 @@ tests /opt/airflow/ tmp /opt/airflow/ " -# parse docker-compose-local.yaml file to convert volumes entries -# from airflow section to "-v" "volume mapping" series of options +# Converts the local mounts that we defined above to the right set of -v +# volume mappings in docker-compose file. This is needed so that we only +# maintain the volumes in one place (above) function convert_local_mounts_to_docker_params() { echo "${LOCAL_MOUNTS}" |sed '/^$/d' | awk -v AIRFLOW_SOURCES="${AIRFLOW_SOURCES}" \ ' @@ -246,6 +281,9 @@ function convert_local_mounts_to_docker_params() { { print "-v"; print AIRFLOW_SOURCES "/" $1 ":" $2 basename($1) ":cached" }' } +# Fixes a file that is expected to be a file. If - for whatever reason - the local file is not created +# When mounting it to container, docker assumes it is a missing directory and creates it. Such mistakenly +# Created directories should be removed and replaced with files function sanitize_file() { if [[ -d "${1}" ]]; then rm -rf "${1}" @@ -253,14 +291,22 @@ function sanitize_file() { touch "${1}" } + +# Those files are mounted into container when run locally +# .bash_history is preserved and you can modify .bash_aliases and .inputrc +# according to your liking function sanitize_mounted_files() { sanitize_file "${AIRFLOW_SOURCES}/.bash_history" sanitize_file "${AIRFLOW_SOURCES}/.bash_aliases" sanitize_file "${AIRFLOW_SOURCES}/.inputrc" + + # When KinD cluster is created, the folder keeps authentication information + # across sessions + mkdir -p "${MY_DIR}/.kube" >/dev/null 2>&1 } # -# Creates cache directory where we will keep temporary files needed for the build +# Creates cache directory where we will keep temporary files needed for the docker build # # This directory will be automatically deleted when the script is killed or exists (via trap) # Unless SKIP_CACHE_DELETION variable is set. You can set this variable and then see @@ -280,6 +326,7 @@ function create_cache_directory() { export OUTPUT_LOG } +# Removes the cache temporary directory function remove_cache_directory() { if [[ -d ${CACHE_TMP_FILE_DIR} ]]; then rm -rf "${CACHE_TMP_FILE_DIR}" @@ -480,20 +527,20 @@ function assert_not_in_container() { fi } +# Removes the "Forced answer" (yes/no/quit) given previously, unles you specifically want to remember it. +# +# This is the default behaviour of all rebuild scripts to ask independently whether you want to +# rebuild the image or not. Sometimes however we want to remember answer previously given. For +# example if you answered "no" to rebuild the image, the assumption is that you do not +# want to rebuild image also for other rebuilds in the same pre-commit execution. +# +# All the pre-commit checks therefore have `export REMEMBER_LAST_ANSWER="true"` set +# So that in case they are run in a sequence of commits they will not rebuild. Similarly if your most +# recent answer was "no" and you run `pre-commit run mypy` (for example) it will also reuse the +# "no" answer given previously. This happens until you run any of the breeze commands or run all +# precommits `pre-commit run` - then the "LAST_FORCE_ANSWER_FILE" will be removed and you will +# be asked again. function forget_last_answer() { - # Removes the "Forced answer" (yes/no/quit) given previously, unles you specifically want to remember it. - # - # This is the default behaviour of all rebuild scripts to ask independently whether you want to - # rebuild the image or not. Sometimes however we want to remember answer previously given. For - # example if you answered "no" to rebuild the image, the assumption is that you do not - # want to rebuild image also for other rebuilds in the same pre-commit execution. - # - # All the pre-commit checks therefore have `export REMEMBER_LAST_ANSWER="true"` set - # So that in case they are run in a sequence of commits they will not rebuild. Similarly if your most - # recent answer was "no" and you run `pre-commit run mypy` (for example) it will also reuse the - # "no" answer given previously. This happens until you run any of the breeze commands or run all - # precommits `pre-commit run` - then the "LAST_FORCE_ANSWER_FILE" will be removed and you will - # be asked again. if [[ ${REMEMBER_LAST_ANSWER:="false"} != "true" ]]; then print_info print_info "Forgetting last answer from ${LAST_FORCE_ANSWER_FILE}:" @@ -509,6 +556,11 @@ function forget_last_answer() { fi } +# Confirms if hte image should be rebuild and interactively checks it with the user. +# In case iit needs to be rebuild. It only ask the user if it determines that the rebuild +# is needed and that the rebuild is not already forced. It asks the user using available terminals +# So that the script works also from within pre-commit run via git hooks - where stdin is not +# available - it tries to find usable terminal and ask the user via this terminal. function confirm_image_rebuild() { ACTION="rebuild" if [[ ${FORCE_PULL_IMAGES:=} == "true" ]]; then @@ -516,7 +568,7 @@ function confirm_image_rebuild() { fi if [[ -f "${LAST_FORCE_ANSWER_FILE}" ]]; then # set variable from last answered response given in the same pre-commit run - so that it can be - # set in one pre-commit check (build) and then used in another (pylint/mypy/flake8 etc). + # answered in teh first pre-commit check (build) and then used in another (pylint/mypy/flake8 etc). # shellcheck disable=SC1090 source "${LAST_FORCE_ANSWER_FILE}" fi @@ -593,7 +645,7 @@ function confirm_image_rebuild() { } # Builds local image manifest -# It contiains .json file - result of docker inspect - decscribing the image +# It contains only one .json file - result of docker inspect - describing the image # We cannot use docker registry APIs as they are available only with authorisation # But this image can be pulled without authentication function build_ci_image_manifest() { @@ -612,8 +664,8 @@ EOF } # -# Retrieve information about layers in the local IMAGE -# it stores list of SHAS of image layers in the file pointed at by TMP_MANIFEST_LOCAL_SHA +# Retrieves information about layers in the local IMAGE +# it stores list of SHAs of image layers in the file pointed at by TMP_MANIFEST_LOCAL_SHA # function get_local_image_info() { TMP_MANIFEST_LOCAL_JSON=$(mktemp) @@ -638,8 +690,8 @@ function get_local_image_info() { } # -# Retrieve information about layers in the remote IMAGE -# it stores list of SHAS of image layers in the file pointed at by TMP_MANIFEST_REMTOE_SHA +# Retrieves information about layers in the remote IMAGE +# it stores list of SHAs of image layers in the file pointed at by TMP_MANIFEST_REMOTE_SHA # This cannot be done easily with existing APIs of Dockerhub because they require additional authentication # even for public images. Therefore instead we are downloading a specially prepared manifest image # which is built together with the main image. This special manifest image is prepared during @@ -669,7 +721,7 @@ function get_remote_image_info() { verbose_docker rm --force "remote-airflow-manifest" >/dev/null 2>&1 } -# The Number is determines the cut-off between local building time and pull + build time. +# The Number determines the cut-off between local building time and pull + build time. # It is a bit experimental and it will have to be kept # updated as we keep on changing layers. The cut-off point is at the moment when we do first # pip install "https://github.com/apache/airflow/archive/${AIRFLOW_BRANCH}.tar... @@ -679,7 +731,7 @@ function get_remote_image_info() { # # This command returns the number of layer in docker history where pip uninstall is called. This is the # line that will take a lot of time to run and at this point it's worth to pull the image from repo -# if there are at least NN chaanged layers in your docker file, you should pull the image. +# if there are at least NN changed layers in your docker file, you should pull the image. # # Note that this only matters if you have any of the important files changed since the last build # of your image such as Dockerfile.ci, setup.py etc. @@ -713,6 +765,11 @@ function compare_layers() { fi } +# Only rebuilds CI image if needed. It checks if the docker image build is needed +# because any of the important source files (from common/_files_for_rebuild_check.sh) has +# changed or in any of the edge cases (docker image removed, .build cache removed etc. +# In case rebuild is needed, it determines (by comparing layers in local and remote image) +# Whether pull is needed before rebuild. function rebuild_ci_image_if_needed() { if [[ ${SKIP_CI_IMAGE_CHECK:="false"} == "true" ]]; then echo @@ -782,9 +839,9 @@ function rebuild_ci_image_if_needed() { fi } - # -# Starts the script/ If VERBOSE_COMMANDS variable is set to true, it enables verbose output of commands executed +# Starts the script. +# If VERBOSE_COMMANDS variable is set to true, it enables verbose output of commands executed # Also prints some useful diagnostics information at start of the script if VERBOSE is set to true # function script_start { @@ -811,8 +868,10 @@ function script_start { } # -# -# Disables verbosity in the script +# Trap function executed always at the end of the script. In case of verbose output it also +# Prints the exit code that the script exits with. Removes verbosity of commands in case it was run with +# command verbosity and in case the script was not run from Breeze (so via ci scripts) it displays +# total time spent in the script so that we can easily see it. # function script_end { #shellcheck disable=2181 @@ -837,6 +896,7 @@ function script_end { remove_cache_directory } +# Changes directory to local sources function go_to_airflow_sources { print_info pushd "${AIRFLOW_SOURCES}" &>/dev/null || exit 1 @@ -857,6 +917,9 @@ function basic_sanity_checks() { } +# Interactive version of confirming the ci image that is used in pre-commits +# it displays additional information - what the user should do in order to bring the local images +# back to state that pre-commit will be happy with function rebuild_ci_image_if_needed_and_confirmed() { NEEDS_DOCKER_BUILD="false" THE_IMAGE_TYPE="CI" @@ -880,7 +943,7 @@ function rebuild_ci_image_if_needed_and_confirmed() { echo "You have those options:" echo " * Rebuild the images now by answering 'y' (this might take some time!)" echo " * Skip rebuilding the images and hope changes are not big (you will be asked again)" - echo " * Quit and manually rebuild the images using one of the following commmands" + echo " * Quit and manually rebuild the images using one of the following commands" echo " * ./breeze build-image" echo " * ./breeze build-image --force-pull-images" echo @@ -896,7 +959,8 @@ function rebuild_ci_image_if_needed_and_confirmed() { fi } - +# Checks if any of the files match the regexp specified the parameters here should be +# match_files_regexp REGEXP FILE FILE ... function match_files_regexp() { FILE_MATCHES="false" REGEXP=${1} @@ -914,6 +978,11 @@ function match_files_regexp() { export FILE_MATCHES } +# Retrieves CI environment variables needed - depending on the CI system we run it in. +# We try to be CI - agnostic and our scripts should run the same way on different CI systems +# (This makes it easy to move between different CI systems) +# This function maps CI-specific variables into a generic ones (prefixed with CI_) that +# we used in other scripts function get_ci_environment() { export CI_EVENT_TYPE="manual" export CI_TARGET_REPO="apache/airflow" @@ -966,7 +1035,9 @@ function get_ci_environment() { echo } - +# Builds the CI image in the CI environment. +# Depending on the type of build (push/pr/scheduled) it will either build it incrementally or +# from the scratch without cache (the latter for scheduled builds only) function build_ci_image_on_ci() { get_ci_environment @@ -989,20 +1060,27 @@ function build_ci_image_on_ci() { rebuild_ci_image_if_needed - # Disable force pulling forced above + # Disable force pulling forced above this is needed for the subsequent scripts so that + # They do not try to pull/build images again unset FORCE_PULL_IMAGES unset FORCE_BUILD } +# Reads environment variable passed as first parameter from the .build cache file function read_from_file { cat "${BUILD_CACHE_DIR}/.$1" 2>/dev/null || true } +# Saves environment variable passed as first parameter to the .build cache file function save_to_file { # shellcheck disable=SC2005 echo "$(eval echo "\$$1")" > "${BUILD_CACHE_DIR}/.$1" } +# check if parameter set for the variable is allowed (should be on the _BREEZE_ALLOWED list) +# and if it is, it saves it to .build cache file. In case the parameter is wrong, the +# saved variable is removed (so that bad value is not used again in case it comes from there) +# and exits with an error function check_and_save_allowed_param { _VARIABLE_NAME="${1}" _VARIABLE_DESCRIPTIVE_NAME="${2}" @@ -1029,6 +1107,7 @@ function check_and_save_allowed_param { save_to_file "${_VARIABLE_NAME}" } +# Docker command to build documentation function run_docs() { verbose_docker run "${EXTRA_DOCKER_FLAGS[@]}" -t \ --entrypoint "/usr/local/bin/dumb-init" \ @@ -1043,6 +1122,7 @@ function run_docs() { | tee -a "${OUTPUT_LOG}" } +# Pulls image in case it is needed (either has never been pulled or pulling was forced # Should be run with set +e # Parameters: # $1 -> image to pull @@ -1066,6 +1146,9 @@ function pull_image_if_needed() { fi } +# Pulls image if needed but tries to pull it from cache (for example GitHub registry) before +# It attempts to pull it from the main repository. This is used to speed up the builds +# In GitHub Actions. # Parameters: # $1 -> image to pull # $2 -> cache image to pull first @@ -1089,6 +1172,7 @@ function pull_image_possibly_from_cache() { set -e } +# Pulls CI image in case caching strategy is "pulled" and the image needs to be pulled function pull_ci_image_if_needed() { # Whether to force pull images to populate cache export FORCE_PULL_IMAGES=${FORCE_PULL_IMAGES:="false"} @@ -1111,6 +1195,7 @@ Docker pulling ${PYTHON_BASE_IMAGE}. } +# Pulls PROD image in case caching strategy is "pulled" and the image needs to be pulled function pull_prod_images_if_needed() { # Whether to force pull images to populate cache export FORCE_PULL_IMAGES=${FORCE_PULL_IMAGES:="false"} @@ -1130,12 +1215,16 @@ function pull_prod_images_if_needed() { fi } +# Prints summary of the build parameters function print_build_info() { print_info print_info "Airflow ${AIRFLOW_VERSION} Python: ${PYTHON_MAJOR_MINOR_VERSION}. Image description: ${IMAGE_DESCRIPTION}" print_info } +# Function to spin ASCII spinner during pull and build in pre-commits to give the user indication that +# Pull/Build is happening. It only spins if the output log changes, so if pull/build is stalled +# The spinner will not move. function spin() { local FILE_TO_MONITOR=${1} local SPIN=("-" "\\" "|" "/") @@ -1169,6 +1258,10 @@ Build log: ${FILE_TO_MONITOR} done } +# Builds CI image - depending on the caching strategy (pulled, local, no-cache) it +# passes the necessary docker build flags via DOCKER_CACHE_CI_DIRECTIVE array +# it also passes the right Build args depending on the configuration of the build +# selected by Breeze flags or environment variables. function build_ci_image() { print_build_info if [[ -n ${DETECTED_TERMINAL:=""} ]]; then @@ -1224,6 +1317,11 @@ Docker building ${AIRFLOW_CI_IMAGE}. fi } +# Builds PROD image - depending on the caching strategy (pulled, local, no-cache) it +# passes the necessary docker build flags via DOCKER_CACHE_PROD_DIRECTIVE and +# DOCKER_CACHE_PROD_BUILD_DIRECTIVE (separate caching options are needed for "build" segment of the image) +# it also passes the right Build args depending on the configuration of the build +# selected by Breeze flags or environment variables. function build_prod_image() { print_build_info pull_prod_images_if_needed @@ -1275,7 +1373,7 @@ function build_prod_image() { fi } - +# Removes airflow CI and base images function remove_all_images() { echo "${AIRFLOW_SOURCES}/confirm" "Removing all local images ." @@ -1312,6 +1410,7 @@ function filterout_deleted_files { xargs -0 "$STAT_BIN" --printf '%n\0' 2>/dev/null || true; } +# Fixes permissions for groups for all the files that are quickly filtered using the filterout_deleted_files function fix_group_permissions() { if [[ ${PERMISSIONS_FIXED:=} == "true" ]]; then echo @@ -1332,6 +1431,8 @@ function fix_group_permissions() { export PERMISSIONS_FIXED="true" } +# Prepares all variables needed by the CI build. Depending on the configuration used (python version +# DockerHub user etc. the variables are set so that other functions can use those variables. function prepare_ci_build() { export AIRFLOW_CI_BASE_TAG="${DEFAULT_BRANCH}-python${PYTHON_MAJOR_MINOR_VERSION}-ci" export AIRFLOW_CI_LOCAL_MANIFEST_IMAGE="local/${DOCKERHUB_REPO}:${AIRFLOW_CI_BASE_TAG}-manifest" @@ -1365,6 +1466,9 @@ function prepare_ci_build() { fix_group_permissions } +# For remote installation of airflow (from GitHub or Pypi) when building the image, you need to +# pass build flags depending on the version and method of the installation (for example to +# get proper requirement constraint files) function add_build_args_for_remote_install() { # entrypoint is used as AIRFLOW_SOURCES_FROM/TO in order to avoid costly copying of all sources of # Airflow - those are not needed for remote install at all. Entrypoint is later overwritten by @@ -1399,6 +1503,8 @@ function add_build_args_for_remote_install() { fi } +# Prepares all variables needed by the CI build. Depending on the configuration used (python version +# DockerHub user etc. the variables are set so that other functions can use those variables. function prepare_prod_build() { export AIRFLOW_PROD_BASE_TAG="${DEFAULT_BRANCH}-python${PYTHON_MAJOR_MINOR_VERSION}" export AIRFLOW_PROD_BUILD_IMAGE="${DOCKERHUB_USER}/${DOCKERHUB_REPO}:${AIRFLOW_PROD_BASE_TAG}-build" @@ -1449,6 +1555,8 @@ function prepare_prod_build() { go_to_airflow_sources } +# Pushes Ci image and it's manifest to the registry. In case the image was taken from cache registry +# it is also pushed to the cache, not to the main registry. Manifest is only pushed to the main registry function push_ci_image() { if [[ ${CACHED_AIRFLOW_CI_IMAGE:=} != "" ]]; then verbose_docker tag "${AIRFLOW_CI_IMAGE}" "${CACHED_AIRFLOW_CI_IMAGE}" @@ -1467,6 +1575,8 @@ function push_ci_image() { fi } +# Pushes PROD image to the registry. In case the image was taken from cache registry +# it is also pushed to the cache, not to the main registry function push_prod_images() { if [[ ${CACHED_AIRFLOW_PROD_IMAGE:=} != "" ]]; then verbose_docker tag "${AIRFLOW_PROD_IMAGE}" "${CACHED_AIRFLOW_PROD_IMAGE}" @@ -1487,6 +1597,7 @@ function push_prod_images() { fi } +# Docker command to generate constraint requirement files. function run_generate_requirements() { docker run "${EXTRA_DOCKER_FLAGS[@]}" \ --entrypoint "/usr/local/bin/dumb-init" \ @@ -1516,7 +1627,8 @@ function set_mysql_encoding() { fi } - +# Retrieves version of airflow stored in the production image (used to display the actual +# Version we use if it was build from PyPI or GitHub function get_airflow_version_from_production_image() { docker run --entrypoint /bin/bash "${AIRFLOW_PROD_IMAGE}" -c 'echo "${AIRFLOW_VERSION}"' } diff --git a/scripts/ci/docker-compose/backend-sqlite.yml b/scripts/ci/docker-compose/backend-sqlite.yml index 5bc8a7e..50972b6 100644 --- a/scripts/ci/docker-compose/backend-sqlite.yml +++ b/scripts/ci/docker-compose/backend-sqlite.yml @@ -20,5 +20,5 @@ services: airflow: environment: - BACKEND=sqlite - - AIRFLOW__CORE__SQL_ALCHEMY_CONN=${SQLITE_VERSION} + - AIRFLOW__CORE__SQL_ALCHEMY_CONN=${SQLITE_URL} - AIRFLOW__CORE__EXECUTOR=SequentialExecutor
