Lee-W commented on code in PR #36158: URL: https://github.com/apache/airflow/pull/36158#discussion_r1421835178
########## scripts/in_container/in_container_utils.py: ########## @@ -0,0 +1,50 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +import shlex +import subprocess +from contextlib import contextmanager + +import rich_click as click +from rich.console import Console + +click.rich_click.COLOR_SYSTEM = "standard" +console = Console(width=400, color_system="standard") + + +@contextmanager +def ci_group(group_name: str, github_actions: bool): + if github_actions: + console.print(f"::group::{group_name[:200]}[/]", markup=False) + console.print(group_name, markup=False) + try: + yield + finally: + if github_actions: + console.print("::endgroup::") + + +def run_command(cmd, github_actions: bool, **kwargs) -> subprocess.CompletedProcess: Review Comment: ```suggestion def run_command(cmd: list[str], github_actions: bool, **kwargs) -> subprocess.CompletedProcess: ``` ########## scripts/in_container/run_generate_constraints.py: ########## @@ -0,0 +1,460 @@ +#!/usr/bin/env python3 +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +import os +import sys +from dataclasses import dataclass +from datetime import datetime +from functools import cached_property +from pathlib import Path +from typing import TextIO + +import requests +from click import Choice +from in_container_utils import click, console, run_command + +AIRFLOW_SOURCES = Path(__file__).resolve().parents[2] +DEFAULT_BRANCH = os.environ.get("DEFAULT_BRANCH", "main") +PYTHON_VERSION = os.environ.get("PYTHON_MAJOR_MINOR_VERSION", "3.8") + +now = datetime.now().isoformat() + +NO_PROVIDERS_CONSTRAINTS_PREFIX = f""" +# +# This constraints file was automatically generated on {now} +# via "eager-upgrade" mechanism of PIP. For the "{DEFAULT_BRANCH}" branch of Airflow. +# This variant of constraints install just the 'bare' 'apache-airflow' package build from the HEAD of +# the branch, without installing any of the providers. +# +# Those constraints represent the "newest" dependencies airflow could use, if providers did not limit +# Airflow in any way. +# +""" + +SOURCE_PROVIDERS_CONSTRAINTS_PREFIX = f""" +# +# This constraints file was automatically generated on {now} +# via "eager-upgrade" mechanism of PIP. For the "{DEFAULT_BRANCH}" branch of Airflow. +# This variant of constraints install uses the HEAD of the branch version of both +# 'apache-airflow' package and all available community provider packages. +# +# Those constraints represent the dependencies that are used by all pull requests when they are build in CI. +# They represent "latest" and greatest set of constraints that HEAD of the "apache-airflow" package should +# Install with "HEAD" of providers. Those are the only constraints that are used by our CI builds. +# +""" + +PYPI_PROVIDERS_CONSTRAINTS_PREFIX = f""" +# +# This constraints file was automatically generated on {now} +# via "eager-upgrade" mechanism of PIP. For the "{DEFAULT_BRANCH}" branch of Airflow. +# This variant of constraints install uses the HEAD of the branch version for 'apache-airflow' but installs +# the providers from PIP-released packages at the moment of the constraint generation. +# +# Those constraints are actually those that regular users use to install released version of Airflow. +# We also use those constraints after "apache-airflow" is released and the constraints are tagged with +# "constraints-X.Y.Z" tag to build the production image for that version. +# +# This constraints file is meant to be used only in the "apache-airflow" installation command and not +# in all subsequent pip commands. By using a constraints.txt file, we ensure that solely the Airflow +# installation step is reproducible. Subsequent pip commands may install packages that would have +# been incompatible with the constraints used in Airflow reproducible installation step. Finally, pip +# commands that might change the installed version of apache-airflow should include "apache-airflow==X.Y.Z" +# in the list of install targets to prevent Airflow accidental upgrade or downgrade. +# +# Typical installation process of airflow for Python 3.8 is (with random selection of extras and custom +# dependencies added), usually consists of two steps: +# +# 1. Reproducible installation of airflow with selected providers (note constraints are used): +# +# pip install "apache-airflow[celery,cncf.kubernetes,google,amazon,snowflake]==X.Y.Z" \\ +# --constraint \\ +# "https://raw.githubusercontent.com/apache/airflow/constraints-X.Y.Z/constraints-{PYTHON_VERSION}.txt" +# +# 2. Installing own dependencies that are potentially not matching the constraints (note constraints are not +# used, and apache-airflow==X.Y.Z is used to make sure there is no accidental airflow upgrade/downgrade. +# +# pip install "apache-airflow==X.Y.Z" "snowflake-connector-python[pandas]=N.M.O" +# +""" + + +@dataclass +class ConfigParams: + airflow_constraints_mode: str + chicken_egg_providers: str + constraints_github_repository: str + default_constraints_branch: str + github_actions: bool + eager_upgrade_additional_requirements: str + python: str + + @cached_property + def constraints_dir(self) -> Path: + constraints_dir = Path("/files") / f"constraints-{self.python}" + constraints_dir.mkdir(parents=True, exist_ok=True) + return constraints_dir + + @cached_property + def latest_constraints_file(self) -> Path: + return self.constraints_dir / f"original-{self.airflow_constraints_mode}-{self.python}.txt" + + @cached_property + def constraints_diff_file(self) -> Path: + return self.constraints_dir / f"diff-{self.airflow_constraints_mode}-{self.python}.md" + + @cached_property + def current_constraints_file(self) -> Path: + return self.constraints_dir / f"{self.airflow_constraints_mode}-{self.python}.txt" + + +def install_local_airflow_with_eager_upgrade( + config_params: ConfigParams, eager_upgrade_additional_requirements: str, extras: list[str] +) -> None: + run_command( + [ + "pip", + "install", + "--root-user-action", + "ignore", + f".[{','.join(extras)}]", + *eager_upgrade_additional_requirements.split(" "), + "--upgrade", + "--upgrade-strategy", + "eager", + ], + github_actions=config_params.github_actions, + cwd=AIRFLOW_SOURCES, + check=True, + ) + + +def freeze_packages_to_file(config_params: ConfigParams, file: TextIO) -> None: + console.print(f"[bright_blue]Freezing constraints to file: {file.name}") + result = run_command( + ["pip", "freeze"], + github_actions=config_params.github_actions, + text=True, + check=True, + capture_output=True, + ) + for line in sorted(result.stdout.split("\n")): + if line.startswith(("apache_airflow", "apache-airflow==", "/opt/airflow", "#", "-e")): + continue + if "@" in line: + continue + file.write(line) + file.write("\n") + console.print(f"[green]Constraints generated to file: {file.name}") + + +def download_latest_constraint_file(config_params: ConfigParams): + constraints_url = ( + "https://raw.githubusercontent.com/" + f"{config_params.constraints_github_repository}/{config_params.default_constraints_branch}/" + f"{config_params.airflow_constraints_mode}-{config_params.python}.txt" + ) + # download the latest constraints file + # download using requests + console.print(f"[bright_blue]Downloading constraints file from {constraints_url}") + r = requests.get(constraints_url, timeout=60) + r.raise_for_status() + with config_params.latest_constraints_file.open("w") as constraints_file: + constraints_file.write(r.text) + console.print(f"[green]Downloaded constraints file from {constraints_url} to {constraints_file.name}") + + +def diff_constraints(config_params: ConfigParams) -> None: + """ + Diffs constraints files and prints the diff to the console. + """ + console.print("[bright_blue]Diffing constraints files") + result = run_command( + [ + "diff", + "--ignore-matching-lines=#", + "--color=always", + config_params.latest_constraints_file.as_posix(), + config_params.current_constraints_file.as_posix(), + ], + # always shows output directly in CI without folded group + github_actions=False, + check=False, + ) + if result.returncode == 0: + console.print("[green]No changes in constraints files. exiting") + config_params.current_constraints_file.unlink(missing_ok=True) + sys.exit(0) + result = run_command( + [ + "diff", + "--ignore-matching-lines=#", + "--color=never", + config_params.latest_constraints_file.as_posix(), + config_params.current_constraints_file.as_posix(), + ], + github_actions=config_params.github_actions, + check=False, + text=True, + capture_output=True, + ) + with config_params.constraints_diff_file.open("w") as diff_file: + diff_file.write( + f"Dependencies {config_params.airflow_constraints_mode} updated " + f"for Python {config_params.python}\n\n" + ) + diff_file.write("```diff\n") + diff_file.write(result.stdout) + diff_file.write("```\n") + console.print(f"[green]Diff generated to file: {config_params.constraints_diff_file}") + + +def uninstall_all_packages(config_params: ConfigParams): + console.print("[bright_blue]Uninstall All PIP packages") + result = run_command( + ["pip", "freeze"], + github_actions=config_params.github_actions, + cwd=AIRFLOW_SOURCES, + text=True, + check=True, + capture_output=True, + ) + all_installed_packages = [ + dep.split("==")[0] + for dep in result.stdout.strip().split("\n") + if not dep.startswith(("apache-airflow", "apache-airflow==", "/opt/airflow", "#", "-e")) + ] + run_command( + ["pip", "uninstall", "--root-user-action", "ignore", "-y", *all_installed_packages], + github_actions=config_params.github_actions, + cwd=AIRFLOW_SOURCES, + text=True, + check=True, + ) + + +def get_core_airflow_dependencies(config_params) -> list[str]: + result = run_command( + ["python", "-c", "import setup; print(','.join(setup.CORE_EXTRAS_DEPENDENCIES.keys()))"], Review Comment: If we're now using Python script here, should we just import setup and run it directly instead of running through subprocess? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
