This is an automated email from the ASF dual-hosted git repository. potiuk pushed a commit to branch rewrite-generate-constraints-in-python in repository https://gitbox.apache.org/repos/asf/airflow.git
commit c8f3e7d32874a70fac50f3fd612436910c161585 Author: Jarek Potiuk <[email protected]> AuthorDate: Sun Dec 10 16:07:06 2023 +0100 Rewrite constraints generation in Python Historically constraints generation (like most in-container scripts) were written in Bash. But since then we rewrote all of the scripts that had some logic to Python - generating constraints was the last one to rewrite (all the other scripts are simple and probably makes sense to keep them in Bash. The scripts have a bit improved diagnostics and output now. --- .../commands/release_management_commands.py | 6 +- .../airflow_breeze/utils/docker_command_utils.py | 7 +- ...put_release-management_generate-constraints.svg | 6 +- scripts/in_container/_in_container_script_init.sh | 2 +- scripts/in_container/_in_container_utils.sh | 92 +---- scripts/in_container/in_container_utils.py | 50 +++ .../in_container/install_airflow_and_providers.py | 33 +- scripts/in_container/run_generate_constraints.py | 459 +++++++++++++++++++++ scripts/in_container/run_generate_constraints.sh | 157 ------- scripts/in_container/run_system_tests.sh | 4 +- 10 files changed, 524 insertions(+), 292 deletions(-) diff --git a/dev/breeze/src/airflow_breeze/commands/release_management_commands.py b/dev/breeze/src/airflow_breeze/commands/release_management_commands.py index 92c0444364..5af44bcc21 100644 --- a/dev/breeze/src/airflow_breeze/commands/release_management_commands.py +++ b/dev/breeze/src/airflow_breeze/commands/release_management_commands.py @@ -610,9 +610,9 @@ def run_generate_constraints( result = execute_command_in_shell( shell_params, project_name=f"constraints-{shell_params.python.replace('.', '-')}", - command="/opt/airflow/scripts/in_container/run_generate_constraints.sh", + command="/opt/airflow/scripts/in_container/run_generate_constraints.py", + output=output, ) - fix_ownership_using_docker() return ( result.returncode, f"Constraints {shell_params.airflow_constraints_mode}:{shell_params.python}", @@ -747,6 +747,7 @@ def generate_constraints( shell_params_list=shell_params_list, skip_cleanup=skip_cleanup, ) + fix_ownership_using_docker() else: shell_params = ShellParams( airflow_constraints_mode=airflow_constraints_mode, @@ -759,6 +760,7 @@ def generate_constraints( shell_params=shell_params, output=None, ) + fix_ownership_using_docker() if return_code != 0: get_console().print(f"[error]There was an error when generating constraints: {info}[/]") sys.exit(return_code) diff --git a/dev/breeze/src/airflow_breeze/utils/docker_command_utils.py b/dev/breeze/src/airflow_breeze/utils/docker_command_utils.py index c3c5b3ef81..2ad25acb7c 100644 --- a/dev/breeze/src/airflow_breeze/utils/docker_command_utils.py +++ b/dev/breeze/src/airflow_breeze/utils/docker_command_utils.py @@ -667,7 +667,7 @@ def bring_compose_project_down(preserve_volumes: bool, shell_params: ShellParams def execute_command_in_shell( - shell_params: ShellParams, project_name: str, command: str | None = None + shell_params: ShellParams, project_name: str, command: str | None = None, output: Output | None = None ) -> RunCommandResult: """Executes command in shell. @@ -712,10 +712,10 @@ def execute_command_in_shell( shell_params.extra_args = (command,) if get_verbose(): get_console().print(f"[info]Command to execute: '{command}'[/]") - return enter_shell(shell_params) + return enter_shell(shell_params, output=output) -def enter_shell(shell_params: ShellParams) -> RunCommandResult: +def enter_shell(shell_params: ShellParams, output: Output | None = None) -> RunCommandResult: """ Executes entering shell using the parameters passed as kwargs: @@ -804,6 +804,7 @@ def enter_shell(shell_params: ShellParams) -> RunCommandResult: text=True, check=False, env=shell_params.env_variables_for_docker_commands, + output=output, output_outside_the_group=True, ) if command_result.returncode == 0: diff --git a/images/breeze/output_release-management_generate-constraints.svg b/images/breeze/output_release-management_generate-constraints.svg index b8bc2f4b31..82651b9a58 100644 --- a/images/breeze/output_release-management_generate-constraints.svg +++ b/images/breeze/output_release-management_generate-constraints.svg @@ -177,9 +177,9 @@ </text><text class="breeze-release-management-generate-constraints-r5" x="0" y="312.8" textLength="12.2" clip-path="url(#breeze-release-management-generate-constraints-line-12)">│</text><text class="breeze-release-management-generate-constraints-r4" x="24.4" y="312.8" textLength="12.2" clip-path="url(#breeze-release-management-generate-constraints-line-12)">-</text><text class="breeze-release-management-generate-constraints-r4" x="36.6" y="312.8" textLength="97.6" clip-path="url(#breeze- [...] </text><text class="breeze-release-management-generate-constraints-r5" x="0" y="337.2" textLength="12.2" clip-path="url(#breeze-release-management-generate-constraints-line-13)">│</text><text class="breeze-release-management-generate-constraints-r6" x="414.8" y="337.2" textLength="866.2" clip-path="url(#breeze-release-management-generate-constraints-line-13)">(constraints-source-providers | constraints | constraints-no-providers)</text><text class="breeze-release-mana [...] </text><text class="breeze-release-management-generate-constraints-r5" x="0" y="361.6" textLength="12.2" clip-path="url(#breeze-release-management-generate-constraints-line-14)">│</text><text class="breeze-release-management-generate-constraints-r5" x="414.8" y="361.6" textLength="866.2" clip-path="url(#breeze-release-management-generate-constraints-line-14)">[default: constraints-source-providers]              &# [...] -</text><text class="breeze-release-management-generate-constraints-r5" x="0" y="386" textLength="12.2" clip-path="url(#breeze-release-management-generate-constraints-line-15)">│</text><text class="breeze-release-management-generate-constraints-r4" x="24.4" y="386" textLength="12.2" clip-path="url(#breeze-release-management-generate-constraints-line-15)">-</text><text class="breeze-release-management-generate-constraints-r4" x="36.6" y="386" textLength="97.6" clip-path="url(#breeze-releas [...] -</text><text class="breeze-release-management-generate-constraints-r5" x="0" y="410.4" textLength="12.2" clip-path="url(#breeze-release-management-generate-constraints-line-16)">│</text><text class="breeze-release-management-generate-constraints-r1" x="414.8" y="410.4" textLength="1024.8" clip-path="url(#breeze-release-management-generate-constraints-line-16)">current_version and should be installed in CI from locally built packages  [...] -</text><text class="breeze-release-management-generate-constraints-r5" x="0" y="434.8" textLength="12.2" clip-path="url(#breeze-release-management-generate-constraints-line-17)">│</text><text class="breeze-release-management-generate-constraints-r1" x="414.8" y="434.8" textLength="1024.8" clip-path="url(#breeze-release-management-generate-constraints-line-17)">current_version.dev0                  & [...] +</text><text class="breeze-release-management-generate-constraints-r5" x="0" y="386" textLength="12.2" clip-path="url(#breeze-release-management-generate-constraints-line-15)">│</text><text class="breeze-release-management-generate-constraints-r4" x="24.4" y="386" textLength="12.2" clip-path="url(#breeze-release-management-generate-constraints-line-15)">-</text><text class="breeze-release-management-generate-constraints-r4" x="36.6" y="386" textLength="97.6" clip-path="url(#breeze-releas [...] +</text><text class="breeze-release-management-generate-constraints-r5" x="0" y="410.4" textLength="12.2" clip-path="url(#breeze-release-management-generate-constraints-line-16)">│</text><text class="breeze-release-management-generate-constraints-r1" x="414.8" y="410.4" textLength="1024.8" clip-path="url(#breeze-release-management-generate-constraints-line-16)">airflow_version >= current_version and should be installed in CI from locall [...] +</text><text class="breeze-release-management-generate-constraints-r5" x="0" y="434.8" textLength="12.2" clip-path="url(#breeze-release-management-generate-constraints-line-17)">│</text><text class="breeze-release-management-generate-constraints-r1" x="414.8" y="434.8" textLength="1024.8" clip-path="url(#breeze-release-management-generate-constraints-line-17)">packages with >= current_version.dev0            &# [...] </text><text class="breeze-release-management-generate-constraints-r5" x="0" y="459.2" textLength="12.2" clip-path="url(#breeze-release-management-generate-constraints-line-18)">│</text><text class="breeze-release-management-generate-constraints-r6" x="414.8" y="459.2" textLength="1024.8" clip-path="url(#breeze-release-management-generate-constraints-line-18)">(TEXT)                     [...] </text><text class="breeze-release-management-generate-constraints-r5" x="0" y="483.6" textLength="12.2" clip-path="url(#breeze-release-management-generate-constraints-line-19)">│</text><text class="breeze-release-management-generate-constraints-r4" x="24.4" y="483.6" textLength="12.2" clip-path="url(#breeze-release-management-generate-constraints-line-19)">-</text><text class="breeze-release-management-generate-constraints-r4" x="36.6" y="483.6" textLength="85.4" clip-path="url(#breeze- [...] </text><text class="breeze-release-management-generate-constraints-r5" x="0" y="508" textLength="1464" clip-path="url(#breeze-release-management-generate-constraints-line-20)">╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯</text><text class="breeze-release-management-generate-constraints-r1" x="1464" y="508" textLength="12.2" clip-path="url(#breeze-release-management-generate-constraints-line-20)"> diff --git a/scripts/in_container/_in_container_script_init.sh b/scripts/in_container/_in_container_script_init.sh index 1446839a4a..2e96e03f7a 100755 --- a/scripts/in_container/_in_container_script_init.sh +++ b/scripts/in_container/_in_container_script_init.sh @@ -25,6 +25,6 @@ IN_CONTAINER_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" . "${IN_CONTAINER_DIR}/_in_container_utils.sh" if [[ ${IN_CONTAINER_INITIALIZED=} != "true" ]]; then in_container_set_colors - in_container_basic_sanity_check + in_container_basic_check in_container_script_start fi diff --git a/scripts/in_container/_in_container_utils.sh b/scripts/in_container/_in_container_utils.sh index fa6a73cba4..9b8ed1679b 100644 --- a/scripts/in_container/_in_container_utils.sh +++ b/scripts/in_container/_in_container_utils.sh @@ -60,7 +60,7 @@ function in_container_go_to_airflow_sources() { pushd "${AIRFLOW_SOURCES}" >/dev/null 2>&1 || exit 1 } -function in_container_basic_sanity_check() { +function in_container_basic_check() { assert_in_container in_container_go_to_airflow_sources } @@ -78,76 +78,6 @@ function dump_airflow_logs() { echo "###########################################################################################" } - -function uninstall_all_pip_packages() { - pip uninstall -y -r <(pip freeze) -} - -function install_local_airflow_with_eager_upgrade() { - local extras - extras="${1}" - # we add eager requirements to make sure to take into account limitations that will allow us to - # install all providers - # shellcheck disable=SC2086 - pip install ".${extras}" ${EAGER_UPGRADE_ADDITIONAL_REQUIREMENTS=} \ - --upgrade --upgrade-strategy eager -} - - -function install_all_providers_from_pypi_with_eager_upgrade() { - NO_PROVIDERS_EXTRAS=$(python -c 'import setup; print(",".join(setup.CORE_EXTRAS_DEPENDENCIES))') - ALL_PROVIDERS_PACKAGES=$(python -c 'import setup; print(setup.get_all_provider_packages())') - local packages_to_install=() - local provider_package - local res - local chicken_egg_prefixes - chicken_egg_prefixes="" - if [[ ${CHICKEN_EGG_PROVIDERS=} != "" ]]; then - echo "${COLOR_BLUE}Finding providers to install from dist: ${CHICKEN_EGG_PROVIDERS}${COLOR_RESET}" - for chicken_egg_provider in ${CHICKEN_EGG_PROVIDERS} - do - chicken_egg_prefixes="${chicken_egg_prefixes} apache-airflow-providers-${chicken_egg_provider//./-}" - done - echo "${COLOR_BLUE}Chicken egg prefixes: ${chicken_egg_prefixes}${COLOR_RESET}" - ls /dist/ - fi - for provider_package in ${ALL_PROVIDERS_PACKAGES} - do - if [[ "${chicken_egg_prefixes}" == *"${provider_package}"* ]]; then - # add the provider prepared in dist folder where chicken - egg problem is mitigated - for file in /dist/"${provider_package//-/_}"*.whl - do - packages_to_install+=( "${file}" ) - echo "Added ${file} from dist folder as this is a chicken-egg package ${COLOR_GREEN}OK${COLOR_RESET}" - done - continue - fi - echo -n "Checking if ${provider_package} is available in PyPI: " - res=$(curl --head -s -o /dev/null -w "%{http_code}" "https://pypi.org/project/${provider_package}/") - if [[ ${res} == "200" ]]; then - packages_to_install+=( "${provider_package}" ) - echo "${COLOR_GREEN}OK${COLOR_RESET}" - else - echo "${COLOR_YELLOW}Skipped${COLOR_RESET}" - fi - done - - - echo "Installing provider packages: ${packages_to_install[*]}" - - - # we add eager requirements to make sure to take into account limitations that will allow us to - # install all providers. We install only those packages that are available in PyPI - we might - # Have some new providers in the works and they might not yet be simply available in PyPI - # Installing it with Airflow makes sure that the version of package that matches current - # Airflow requirements will be used. - # shellcheck disable=SC2086 - set -x - pip install ".[${NO_PROVIDERS_EXTRAS}]" "${packages_to_install[@]}" ${EAGER_UPGRADE_ADDITIONAL_REQUIREMENTS=} \ - --upgrade --upgrade-strategy eager - set +x -} - function in_container_set_colors() { COLOR_BLUE=$'\e[34m' COLOR_GREEN=$'\e[32m' @@ -161,25 +91,5 @@ function in_container_set_colors() { export COLOR_YELLOW } - -# Starts group for GitHub Actions - makes logs much more readable -function group_start { - if [[ ${GITHUB_ACTIONS:="false"} == "true" || ${GITHUB_ACTIONS} == "True" ]]; then - echo "::group::${1}" - else - echo - echo "${1}" - echo - fi -} - -# Ends group for GitHub Actions -function group_end { - if [[ ${GITHUB_ACTIONS:="false"} == "true" || ${GITHUB_ACTIONS} == "True" ]]; then - echo -e "\033[0m" # Disable any colors set in the group - echo "::endgroup::" - fi -} - export CI=${CI:="false"} export GITHUB_ACTIONS=${GITHUB_ACTIONS:="false"} diff --git a/scripts/in_container/in_container_utils.py b/scripts/in_container/in_container_utils.py new file mode 100644 index 0000000000..6e41545673 --- /dev/null +++ b/scripts/in_container/in_container_utils.py @@ -0,0 +1,50 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +import shlex +import subprocess +from contextlib import contextmanager + +import rich_click as click +from rich.console import Console + +click.rich_click.COLOR_SYSTEM = "standard" +console = Console(width=400, color_system="standard") + + +@contextmanager +def ci_group(group_name: str, github_actions: bool): + if github_actions: + console.print(f"::group::{group_name[:200]}[/]", markup=False) + console.print(group_name, markup=False) + try: + yield + finally: + if github_actions: + console.print("::endgroup::") + + +def run_command(cmd, github_actions: bool, **kwargs) -> subprocess.CompletedProcess: + with ci_group( + f"Running command: {' '.join([shlex.quote(arg) for arg in cmd])}", github_actions=github_actions + ): + result = subprocess.run(cmd, **kwargs) + if result.returncode != 0 and github_actions: + console.print(f"[red]Command failed: {' '.join([shlex.quote(entry) for entry in cmd])}[/]") + console.print("[red]Please unfold the above group and to investigate the issue[/]") + return result diff --git a/scripts/in_container/install_airflow_and_providers.py b/scripts/in_container/install_airflow_and_providers.py index b434de53a0..9eadad9789 100755 --- a/scripts/in_container/install_airflow_and_providers.py +++ b/scripts/in_container/install_airflow_and_providers.py @@ -20,47 +20,16 @@ from __future__ import annotations import re -import shlex -import subprocess import sys -from contextlib import contextmanager from pathlib import Path from typing import NamedTuple -import rich_click as click -from rich.console import Console - -console = Console(width=400, color_system="standard") - -click.rich_click.COLOR_SYSTEM = "standard" +from in_container_utils import click, console, run_command AIRFLOW_SOURCE_DIR = Path(__file__).resolve().parents[1] DIST_FOLDER = Path("/dist") -@contextmanager -def ci_group(group_name: str, github_actions: bool): - if github_actions: - console.print(f"::group::{group_name[:200]}[/]", markup=False) - console.print(group_name, markup=False) - try: - yield - finally: - if github_actions: - console.print("::endgroup::") - - -def run_command(cmd, github_actions: bool, **kwargs) -> subprocess.CompletedProcess: - with ci_group( - f"Running command: {' '.join([shlex.quote(arg) for arg in cmd])}", github_actions=github_actions - ): - result = subprocess.run(cmd, **kwargs) - if result.returncode != 0 and github_actions: - console.print(f"[red]Command failed: {' '.join([shlex.quote(entry) for entry in cmd])}[/]") - console.print("[red]Please unfold the above group and to investigate the issue[/]") - return result - - def get_provider_name(package_name: str) -> str: return ".".join(package_name.split("-")[0].replace("apache_airflow_providers_", "").split("_")) diff --git a/scripts/in_container/run_generate_constraints.py b/scripts/in_container/run_generate_constraints.py new file mode 100755 index 0000000000..1bebdf8ac5 --- /dev/null +++ b/scripts/in_container/run_generate_constraints.py @@ -0,0 +1,459 @@ +#!/usr/bin/env python3 +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +import os +import sys +from dataclasses import dataclass +from datetime import datetime +from functools import cached_property +from pathlib import Path +from typing import TextIO + +import requests +from click import Choice +from in_container_utils import click, console, run_command + +AIRFLOW_SOURCES = Path(__file__).resolve().parents[2] +DEFAULT_BRANCH = os.environ.get("DEFAULT_BRANCH", "main") +PYTHON_VERSION = os.environ.get("PYTHON_MAJOR_MINOR_VERSION", "3.8") + +now = datetime.now().isoformat() + +NO_PROVIDERS_CONSTRAINTS_PREFIX = f""" +# +# This constraints file was automatically generated on {now} +# via "eager-upgrade" mechanism of PIP. For the "{DEFAULT_BRANCH}" branch of Airflow. +# This variant of constraints install just the 'bare' 'apache-airflow' package build from the HEAD of +# the branch, without installing any of the providers. +# +# Those constraints represent the "newest" dependencies airflow could use, if providers did not limit +# Airflow in any way. +# +""" + +SOURCE_PROVIDERS_CONSTRAINTS_PREFIX = f""" +# +# This constraints file was automatically generated on {now} +# via "eager-upgrade" mechanism of PIP. For the "{DEFAULT_BRANCH}" branch of Airflow. +# This variant of constraints install uses the HEAD of the branch version of both +# 'apache-airflow' package and all available community provider packages. +# +# Those constraints represent the dependencies that are used by all pull requests when they are build in CI. +# They represent "latest" and greatest set of constraints that HEAD of the "apache-airflow" package should +# Install with "HEAD" of providers. Those are the only constraints that are used by our CI builds. +# +""" + +PYPI_PROVIDERS_CONSTRAINTS_PREFIX = f""" +# +# This constraints file was automatically generated on {now} +# via "eager-upgrade" mechanism of PIP. For the "{DEFAULT_BRANCH}" branch of Airflow. +# This variant of constraints install uses the HEAD of the branch version for 'apache-airflow' but installs +# the providers from PIP-released packages at the moment of the constraint generation. +# +# Those constraints are actually those that regular users use to install released version of Airflow. +# We also use those constraints after "apache-airflow" is released and the constraints are tagged with +# "constraints-X.Y.Z" tag to build the production image for that version. +# +# This constraints file is meant to be used only in the "apache-airflow" installation command and not +# in all subsequent pip commands. By using a constraints.txt file, we ensure that solely the Airflow +# installation step is reproducible. Subsequent pip commands may install packages that would have +# been incompatible with the constraints used in Airflow reproducible installation step. Finally, pip +# commands that might change the installed version of apache-airflow should include "apache-airflow==X.Y.Z" +# in the list of install targets to prevent Airflow accidental upgrade or downgrade. +# +# Typical installation process of airflow for Python 3.8 is (with random selection of extras and custom +# dependencies added), usually consists of two steps: +# +# 1. Reproducible installation of airflow with selected providers (note constraints are used): +# +# pip install "apache-airflow[celery,cncf.kubernetes,google,amazon,snowflake]==X.Y.Z" \\ +# --constraint \\ +# "https://raw.githubusercontent.com/apache/airflow/constraints-X.Y.Z/constraints-{PYTHON_VERSION}.txt" +# +# 2. Installing own dependencies that are potentially not matching the constraints (note constraints are not +# used, and apache-airflow==X.Y.Z is used to make sure there is no accidental airflow upgrade/downgrade. +# +# pip install "apache-airflow==X.Y.Z" "snowflake-connector-python[pandas]=N.M.O" +# +""" + + +@dataclass +class ConfigParams: + airflow_constraints_mode: str + chicken_egg_providers: str + constraints_github_repository: str + default_constraints_branch: str + github_actions: bool + eager_upgrade_additional_requirements: str + python: str + + @cached_property + def constraints_dir(self) -> Path: + constraints_dir = Path("/files") / self.python + constraints_dir.mkdir(parents=True, exist_ok=True) + return constraints_dir + + @cached_property + def latest_constraints_file(self) -> Path: + return self.constraints_dir / f"original-{self.airflow_constraints_mode}-{self.python}.txt" + + @cached_property + def constraints_diff_file(self) -> Path: + return self.constraints_dir / f"diff-{self.airflow_constraints_mode}-{self.python}.txt" + + @cached_property + def current_constraints_file(self) -> Path: + return self.constraints_dir / f"{self.airflow_constraints_mode}-{self.python}.txt" + + +def install_local_airflow_with_eager_upgrade( + config_params: ConfigParams, eager_upgrade_additional_requirements: str, extras: list[str] +) -> None: + run_command( + [ + "pip", + "install", + "--root-user-action", + "ignore", + f".[{','.join(extras)}]", + *eager_upgrade_additional_requirements.split(" "), + "--upgrade", + "--upgrade-strategy", + "eager", + ], + github_actions=config_params.github_actions, + cwd=AIRFLOW_SOURCES, + check=True, + ) + + +def freeze_packages_to_file(config_params: ConfigParams, file: TextIO) -> None: + console.print(f"[bright_blue]Freezing constraints to file: {file.name}") + result = run_command( + ["pip", "freeze"], + github_actions=config_params.github_actions, + text=True, + check=True, + capture_output=True, + ) + for line in sorted(result.stdout.split("\n")): + if line.startswith(("apache_airflow", "apache-airflow==", "/opt/airflow")): + continue + if "@" in line: + continue + file.write(line) + file.write("\n") + console.print(f"[green]Constraints generated to file: {file.name}") + + +def download_latest_constraint_file(config_params: ConfigParams): + constraints_url = ( + "https://raw.githubusercontent.com/" + f"{config_params.constraints_github_repository}/{config_params.default_constraints_branch}/" + f"{config_params.airflow_constraints_mode}-{config_params.python}.txt" + ) + # download the latest constraints file + # download using requests + console.print(f"[bright_blue]Downloading constraints file from {constraints_url}") + r = requests.get(constraints_url, timeout=60) + r.raise_for_status() + with config_params.latest_constraints_file.open("w") as constraints_file: + constraints_file.write(r.text) + console.print(f"[green]Downloaded constraints file from {constraints_url} to {constraints_file.name}") + + +def diff_constraints(config_params: ConfigParams) -> None: + """ + Diffs constraints files and prints the diff to the console. + """ + console.print("[bright_blue]Diffing constraints files") + result = run_command( + [ + "diff", + "--ignore-matching-lines=#", + "--color=always", + config_params.latest_constraints_file.as_posix(), + config_params.current_constraints_file.as_posix(), + ], + github_actions=config_params.github_actions, + check=False, + ) + if result.returncode == 0: + console.print("[green]No changes in constraints files. exiting") + config_params.current_constraints_file.unlink(missing_ok=True) + sys.exit(0) + result = run_command( + [ + "diff", + "--ignore-matching-lines=#", + "--color=never", + config_params.latest_constraints_file.as_posix(), + config_params.current_constraints_file.as_posix(), + ], + github_actions=config_params.github_actions, + check=False, + text=True, + capture_output=True, + ) + with config_params.constraints_diff_file.open("w") as diff_file: + diff_file.write( + f"Dependencies {config_params.airflow_constraints_mode} updated " + f"for Python {config_params.python}\n\n" + ) + diff_file.write("```diff\n") + diff_file.write(result.stdout) + diff_file.write("```\n") + console.print(f"[green]Diff generated to file: {config_params.constraints_diff_file}") + + +def uninstall_all_packages(config_params: ConfigParams): + console.print("[bright_blue]Uninstall All PIP packages") + result = run_command( + ["pip", "freeze"], + github_actions=config_params.github_actions, + cwd=AIRFLOW_SOURCES, + text=True, + check=True, + capture_output=True, + ) + all_installed_packages = [ + dep.split("==")[0] + for dep in result.stdout.strip().split("\n") + if not dep.startswith(("apache-airflow", "apache-airflow==", "/opt/airflow", "#", "-e")) + ] + run_command( + ["pip", "uninstall", "--root-user-action", "ignore", "-y", *all_installed_packages], + github_actions=config_params.github_actions, + cwd=AIRFLOW_SOURCES, + text=True, + check=True, + ) + + +def get_core_airflow_dependencies(config_params) -> list[str]: + result = run_command( + ["python", "-c", "import setup; print(','.join(setup.CORE_EXTRAS_DEPENDENCIES.keys()))"], + github_actions=config_params.github_actions, + cwd=AIRFLOW_SOURCES, + text=True, + check=True, + capture_output=True, + ) + return result.stdout.strip().split(",") + + +def get_all_provider_packages(config_params) -> list[str]: + result = run_command( + ["python", "-c", "import setup; print(setup.get_all_provider_packages())"], + github_actions=config_params.github_actions, + cwd=AIRFLOW_SOURCES, + text=True, + check=True, + capture_output=True, + ) + return result.stdout.strip().split(" ") + + +def generate_constraints_source_providers(config_params: ConfigParams) -> None: + """ + Generates constraints with provider dependencies used from current sources. This might be different + from the constraints generated from the latest released version of the providers in PyPI. Those + constraints are used in CI builds when we install providers built using current sources and in + Breeze CI image builds. + """ + with config_params.current_constraints_file.open("w") as constraints_file: + constraints_file.write(SOURCE_PROVIDERS_CONSTRAINTS_PREFIX) + freeze_packages_to_file(config_params, constraints_file) + download_latest_constraint_file(config_params) + diff_constraints(config_params) + + +def generate_constraints_pypi_providers(config_params: ConfigParams) -> None: + """ + Generates constraints with provider installed from PyPI. This is the default constraints file + used in production/release builds when we install providers from PyPI and when tagged, those + providers are used by our users to install Airflow in reproducible way. + :return: + """ + dist_dir = Path("/dist") + core_dependencies = get_core_airflow_dependencies(config_params) + all_provider_packages = get_all_provider_packages(config_params) + chicken_egg_prefixes = [] + packages_to_install = [] + console.print("[bright_blue]Installing Airflow with PyPI providers with eager upgrade") + if config_params.chicken_egg_providers: + for chicken_egg_provider in config_params.chicken_egg_providers.split(" "): + chicken_egg_prefixes.append(f"apache-airflow-providers-{chicken_egg_provider.replace('.','-')}") + console.print( + f"[bright_blue]Checking if {chicken_egg_prefixes} are available in local dist folder " + f"as chicken egg providers)" + ) + for provider_package in all_provider_packages: + if config_params.chicken_egg_providers and provider_package.startswith(tuple(chicken_egg_prefixes)): + glob_pattern = f"{provider_package.replace('-','_')}-*.whl" + console.print( + f"[bright_blue]Checking if {provider_package} is available in local dist folder " + f"with {glob_pattern} pattern" + ) + files = dist_dir.glob(glob_pattern) + for file in files: + console.print( + f"[yellow]Installing {file.name} from local dist folder as it is " + f"a chicken egg provider" + ) + packages_to_install.append(file.as_posix()) + else: + console.print( + f"[yellow]Skipping {provider_package} as it is not found in dist folder to install." + ) + continue + console.print(f"[bright_blue]Checking if {provider_package} is available in PyPI: ... ", end="") + r = requests.head(f"https://pypi.org/pypi/{provider_package}/json", timeout=60) + if r.status_code == 200: + console.print("[green]OK") + packages_to_install.append(provider_package) + else: + console.print("[yellow]NOK. Skipping.") + run_command( + cmd=[ + "pip", + "install", + "--root-user-action", + "ignore", + f".[{','.join(core_dependencies)}]", + *packages_to_install, + *config_params.eager_upgrade_additional_requirements.split(" "), + "--upgrade", + "--upgrade-strategy", + "eager", + ], + github_actions=config_params.github_actions, + check=True, + ) + console.print("[success]Installed airflow with PyPI providers with eager upgrade.") + with config_params.current_constraints_file.open("w") as constraints_file: + constraints_file.write(PYPI_PROVIDERS_CONSTRAINTS_PREFIX) + freeze_packages_to_file(config_params, constraints_file) + download_latest_constraint_file(config_params) + diff_constraints(config_params) + + +def generate_constraints_no_providers(config_params: ConfigParams) -> None: + """ + Generates constraints without any provider dependencies. This is used mostly to generate SBOM + files - where we generate list of dependencies for Airflow without any provider installed. + """ + core_dependencies = get_core_airflow_dependencies(config_params) + uninstall_all_packages(config_params) + console.print( + f"[bright_blue]Installing airflow with [{core_dependencies}] extras only " f"with eager upgrade." + ) + install_local_airflow_with_eager_upgrade( + config_params, config_params.eager_upgrade_additional_requirements, core_dependencies + ) + console.print(f"[success]Installed airflow with [{core_dependencies}] extras only with eager upgrade.") + with config_params.current_constraints_file.open("w") as constraints_file: + constraints_file.write(NO_PROVIDERS_CONSTRAINTS_PREFIX) + freeze_packages_to_file(config_params, constraints_file) + download_latest_constraint_file(config_params) + diff_constraints(config_params) + + +ALLOWED_CONSTRAINTS_MODES = ["constraints", "constraints-source-providers", "constraints-no-providers"] + + [email protected]() [email protected]( + "--airflow-constraints-mode", + type=Choice(ALLOWED_CONSTRAINTS_MODES), + required=True, + envvar="AIRFLOW_CONSTRAINTS_MODE", + help="Mode of constraints to generate", +) [email protected]( + "--chicken-egg-providers", + envvar="CHICKEN_EGG_PROVIDERS", + help="Providers that should be installed from packages built from current sources.", +) [email protected]( + "--constraints-github-repository", + default="apache/airflow", + show_default=True, + envvar="CONSTRAINTS_GITHUB_REPOSITORY", + help="GitHub repository to get constraints from", +) [email protected]( + "--default-constraints-branch", + default="constraints-main", + show_default=True, + envvar="DEFAULT_CONSTRAINTS_BRANCH", + help="Branch to get constraints from", +) [email protected]( + "--eager-upgrade-additional-requirements", + envvar="EAGER_UPGRADE_ADDITIONAL_REQUIREMENTS", + help="Additional requirements to add to eager upgrade", +) [email protected]( + "--github-actions", + is_flag=True, + default=False, + show_default=True, + envvar="GITHUB_ACTIONS", + help="Running in GitHub Actions", +) [email protected]( + "--python", + required=True, + envvar="PYTHON_MAJOR_MINOR_VERSION", + help="Python major.minor version", +) +def generate_constraints( + airflow_constraints_mode: str, + chicken_egg_providers: str, + constraints_github_repository: str, + default_constraints_branch: str, + eager_upgrade_additional_requirements: str, + github_actions: bool, + python: str, +) -> None: + config_params = ConfigParams( + airflow_constraints_mode=airflow_constraints_mode, + chicken_egg_providers=chicken_egg_providers, + constraints_github_repository=constraints_github_repository, + default_constraints_branch=default_constraints_branch, + eager_upgrade_additional_requirements=eager_upgrade_additional_requirements, + github_actions=github_actions, + python=python, + ) + if airflow_constraints_mode == "constraints-source-providers": + generate_constraints_source_providers(config_params) + elif airflow_constraints_mode == "constraints": + generate_constraints_pypi_providers(config_params) + elif airflow_constraints_mode == "constraints-no-providers": + generate_constraints_no_providers(config_params) + else: + console.print(f"[red]Unknown constraints mode: {airflow_constraints_mode}") + sys.exit(1) + + +if __name__ == "__main__": + generate_constraints() diff --git a/scripts/in_container/run_generate_constraints.sh b/scripts/in_container/run_generate_constraints.sh deleted file mode 100755 index 2c969c5db5..0000000000 --- a/scripts/in_container/run_generate_constraints.sh +++ /dev/null @@ -1,157 +0,0 @@ -#!/usr/bin/env bash -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# shellcheck source=scripts/in_container/_in_container_script_init.sh -. "$( dirname "${BASH_SOURCE[0]}" )/_in_container_script_init.sh" - -CONSTRAINTS_DIR="/files/constraints-${PYTHON_MAJOR_MINOR_VERSION}" - -LATEST_CONSTRAINT_FILE="${CONSTRAINTS_DIR}/original-${AIRFLOW_CONSTRAINTS_MODE}-${PYTHON_MAJOR_MINOR_VERSION}.txt" -CONSTRAINTS_MARKDOWN_DIFF="${CONSTRAINTS_DIR}/diff-${AIRFLOW_CONSTRAINTS_MODE}-${PYTHON_MAJOR_MINOR_VERSION}.md" -mkdir -pv "${CONSTRAINTS_DIR}" - - -if [[ ${AIRFLOW_CONSTRAINTS_MODE} == "constraints-no-providers" ]]; then - NO_PROVIDERS_EXTRAS=$(python -c 'import setup; print(",".join(setup.CORE_EXTRAS_DEPENDENCIES.keys()))') - CURRENT_CONSTRAINT_FILE="${CONSTRAINTS_DIR}/${AIRFLOW_CONSTRAINTS_MODE}-${PYTHON_MAJOR_MINOR_VERSION}.txt" - echo - echo "UnInstall All PIP packages." - echo - uninstall_all_pip_packages - echo - echo "Install airflow with [${NO_PROVIDERS_EXTRAS}] extras only (uninstall all packages first)." - echo - install_local_airflow_with_eager_upgrade "[${NO_PROVIDERS_EXTRAS}]" - cat <<EOF >"${CURRENT_CONSTRAINT_FILE}" -# -# This constraints file was automatically generated on $(date -u +'%Y-%m-%dT%H:%M:%SZ') -# via "eager-upgrade" mechanism of PIP. For the "${DEFAULT_BRANCH}" branch of Airflow. -# This variant of constraints install just the 'bare' 'apache-airflow' package build from the HEAD of -# the branch, without installing any of the providers. -# -# Those constraints represent the "newest" dependencies airflow could use, if providers did not limit -# Airflow in any way. -# -EOF -elif [[ ${AIRFLOW_CONSTRAINTS_MODE} == "constraints-source-providers" ]]; then - CURRENT_CONSTRAINT_FILE="${CONSTRAINTS_DIR}/${AIRFLOW_CONSTRAINTS_MODE}-${PYTHON_MAJOR_MINOR_VERSION}.txt" - echo - echo "Providers are already installed from sources." - echo - cat <<EOF >"${CURRENT_CONSTRAINT_FILE}" -# -# This constraints file was automatically generated on $(date -u +'%Y-%m-%dT%H:%M:%SZ') -# via "eager-upgrade" mechanism of PIP. For the "${DEFAULT_BRANCH}" branch of Airflow. -# This variant of constraints install uses the HEAD of the branch version of both -# 'apache-airflow' package and all available community provider packages. -# -# Those constraints represent the dependencies that are used by all pull requests when they are build in CI. -# They represent "latest" and greatest set of constraints that HEAD of the "apache-airflow" package should -# Install with "HEAD" of providers. Those are the only constraints that are used by our CI builds. -# -EOF -elif [[ ${AIRFLOW_CONSTRAINTS_MODE} == "constraints" ]]; then - CURRENT_CONSTRAINT_FILE="${CONSTRAINTS_DIR}/${AIRFLOW_CONSTRAINTS_MODE}-${PYTHON_MAJOR_MINOR_VERSION}.txt" - echo - echo "Install all providers from PyPI so that they are included in the constraints." - echo - install_all_providers_from_pypi_with_eager_upgrade - cat <<EOF >"${CURRENT_CONSTRAINT_FILE}" -# -# This constraints file was automatically generated on $(date -u +'%Y-%m-%dT%H:%M:%SZ') -# via "eager-upgrade" mechanism of PIP. For the "${DEFAULT_BRANCH}" branch of Airflow. -# This variant of constraints install uses the HEAD of the branch version for 'apache-airflow' but installs -# the providers from PIP-released packages at the moment of the constraint generation. -# -# Those constraints are actually those that regular users use to install released version of Airflow. -# We also use those constraints after "apache-airflow" is released and the constraints are tagged with -# "constraints-X.Y.Z" tag to build the production image for that version. -# -# This constraints file is meant to be used only in the "apache-airflow" installation command and not -# in all subsequent pip commands. By using a constraints.txt file, we ensure that solely the Airflow -# installation step is reproducible. Subsequent pip commands may install packages that would have -# been incompatible with the constraints used in Airflow reproducible installation step. Finally, pip -# commands that might change the installed version of apache-airflow should include "apache-airflow==X.Y.Z" -# in the list of install targets to prevent Airflow accidental upgrade or downgrade. -# -# Typical installation process of airflow for Python 3.8 is (with random selection of extras and custom -# dependencies added), usually consists of two steps: -# -# 1. Reproducible installation of airflow with selected providers (note constraints are used): -# -# pip install "apache-airflow[celery,cncf.kubernetes,google,amazon,snowflake]==X.Y.Z" \\ -# --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-X.Y.Z/constraints-3.8.txt" -# -# 2. Installing own dependencies that are potentially not matching the constraints (note constraints are not -# used, and apache-airflow==X.Y.Z is used to make sure there is no accidental airflow upgrade/downgrade. -# -# pip install "apache-airflow==X.Y.Z" "snowflake-connector-python[pandas]==2.9.0" -# -EOF -else - echo - echo "${COLOR_RED}Error! AIRFLOW_CONSTRAINTS_MODE has wrong value: '${AIRFLOW_CONSTRAINTS_MODE}' ${COLOR_RESET}" - echo - exit 1 -fi - -readonly AIRFLOW_CONSTRAINTS_MODE - -CONSTRAINTS_LOCATION="https://raw.githubusercontent.com/${CONSTRAINTS_GITHUB_REPOSITORY}/${DEFAULT_CONSTRAINTS_BRANCH}/${AIRFLOW_CONSTRAINTS_MODE}-${PYTHON_MAJOR_MINOR_VERSION}.txt" -readonly CONSTRAINTS_LOCATION - -touch "${LATEST_CONSTRAINT_FILE}" -curl --connect-timeout 60 --max-time 60 "${CONSTRAINTS_LOCATION}" --output "${LATEST_CONSTRAINT_FILE}" || true - -echo -echo "Freezing constraints to ${CURRENT_CONSTRAINT_FILE}" -echo - -pip freeze | sort | \ - grep -v "apache_airflow" | \ - grep -v "apache-airflow==" | \ - grep -v "@" | \ - grep -v "/opt/airflow" >>"${CURRENT_CONSTRAINT_FILE}" - -echo -echo "Constraints generated in ${CURRENT_CONSTRAINT_FILE}" -echo - -set +e -if diff "--ignore-matching-lines=#" --color=always "${LATEST_CONSTRAINT_FILE}" "${CURRENT_CONSTRAINT_FILE}"; then - echo - echo "${COLOR_GREEN}No changes in constraints - exiting${COLOR_RESET}" - echo - rm -f "${CONSTRAINTS_MARKDOWN_DIFF}" - exit 0 -fi - -cat <<EOF >"${CONSTRAINTS_MARKDOWN_DIFF}" -# Dependencies updated for Python ${PYTHON_MAJOR_MINOR_VERSION} - -\`\`\`diff -$(diff --unified=0 --ignore-matching-lines=# "${LATEST_CONSTRAINT_FILE}" "${CURRENT_CONSTRAINT_FILE}") -\`\`\` -EOF - -echo -echo "Constraints error markdown generated in ${CONSTRAINTS_MARKDOWN_DIFF}" -echo - -ls "${CONSTRAINTS_MARKDOWN_DIFF}" - -exit 0 diff --git a/scripts/in_container/run_system_tests.sh b/scripts/in_container/run_system_tests.sh index b708e8a2cf..eb9d541844 100755 --- a/scripts/in_container/run_system_tests.sh +++ b/scripts/in_container/run_system_tests.sh @@ -26,9 +26,7 @@ IN_CONTAINER_DIR=$(cd "$(dirname "$0")" || exit 1; pwd) . "${IN_CONTAINER_DIR}/_in_container_utils.sh" in_container_set_colors - -in_container_basic_sanity_check - +in_container_basic_check in_container_script_start # any argument received is overriding the default nose execution arguments:
