This is an automated email from the ASF dual-hosted git repository. potiuk pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/main by this push: new d409b8be96 Improve editable airflow installation by adding preinstalled deps (#38764) d409b8be96 is described below commit d409b8be966b51207e10129e7709e943a5dc7ac3 Author: Jarek Potiuk <ja...@potiuk.com> AuthorDate: Mon Apr 8 18:56:17 2024 +0200 Improve editable airflow installation by adding preinstalled deps (#38764) * Improve editable airflow installation by adding preinstalled deps While preparing for presentation about modernizing packaging setup I found out that we can slightly improve editable installation of airflow. So far we required a "devel" installation in order to make a working editable installation of airflow (because the devel extra was installing dependencies for all the pre-installed providers). However that required to synchronize the list of providers installed in `devel` dependency with the list of preinstalled providers, as well as it made `pip install -e .` resulting with a non-working version of Airflow (because it requires `fab` provider dependencies to start airflow webserver). This PR improves it - in editable mode, instead of adding the pre-installed providers, we add their dependencies. This way we can remove the pre-installed providers from the list of devel providers to install - because now the pre-installed provider dependencies are installed simply as "required" dependencies. As a result - simple `pip install -e .` should now result in fully working airflow installation - without all the devel goodies and without celery and kubernetes dependencies, but fully usable for sequential and local executor cases. Also reviewed and updated the comments in hatch_build.py to better reflect the purpose and behaviour of some of the methods there. * Update hatch_build.py Co-authored-by: Ephraim Anierobi <splendidzig...@gmail.com> --------- Co-authored-by: Ephraim Anierobi <splendidzig...@gmail.com> --- airflow_pre_installed_providers.txt | 9 --- hatch_build.py | 150 ++++++++++++++++++++++++++---------- 2 files changed, 110 insertions(+), 49 deletions(-) diff --git a/airflow_pre_installed_providers.txt b/airflow_pre_installed_providers.txt deleted file mode 100644 index e717ea696e..0000000000 --- a/airflow_pre_installed_providers.txt +++ /dev/null @@ -1,9 +0,0 @@ -# List of all the providers that are pre-installed when you run `pip install apache-airflow` without extras -common.io -common.sql -fab>=1.0.2rc1 -ftp -http -imap -smtp -sqlite diff --git a/hatch_build.py b/hatch_build.py index 69b6da7a1c..532ee7d3e3 100644 --- a/hatch_build.py +++ b/hatch_build.py @@ -273,8 +273,6 @@ DEVEL_EXTRAS: dict[str, list[str]] = { "devel": [ "apache-airflow[celery]", "apache-airflow[cncf-kubernetes]", - "apache-airflow[common-io]", - "apache-airflow[common-sql]", "apache-airflow[devel-debuggers]", "apache-airflow[devel-devscripts]", "apache-airflow[devel-duckdb]", @@ -282,11 +280,6 @@ DEVEL_EXTRAS: dict[str, list[str]] = { "apache-airflow[devel-sentry]", "apache-airflow[devel-static-checks]", "apache-airflow[devel-tests]", - "apache-airflow[fab]", - "apache-airflow[ftp]", - "apache-airflow[http]", - "apache-airflow[imap]", - "apache-airflow[sqlite]", ], "devel-all-dbs": [ "apache-airflow[apache-cassandra]", @@ -550,11 +543,35 @@ ALL_DYNAMIC_EXTRAS: list[str] = sorted( def get_provider_id(provider_spec: str) -> str: - # in case provider_spec is "<provider_id>=<version>" - return provider_spec.split(">=")[0] + """ + Extract provider id from provider specification. + + :param provider_spec: provider specification can be in the form of the "PROVIDER_ID" or + "apache-airflow-providers-PROVIDER", optionally followed by ">=VERSION". + + :return: short provider_id with `.` instead of `-` in case of `apache` and other providers with + `-` in the name. + """ + _provider_id = provider_spec.split(">=")[0] + if _provider_id.startswith("apache-airflow-providers-"): + _provider_id = _provider_id.replace("apache-airflow-providers-", "").replace("-", ".") + return _provider_id def get_provider_requirement(provider_spec: str) -> str: + """ + Convert provider specification with provider_id to provider requirement. + + The requirement can be used when constructing dependencies. It automatically adds pre-release specifier + in case we are building pre-release version of Airflow. This way we can handle the case when airflow + depends on specific version of the provider that has not yet been released - then we release the + pre-release version of provider to PyPI and airflow built in CI, or Airflow pre-release version will + automatically depend on that pre-release version of the provider. + + :param provider_spec: provider specification can be in the form of the "PROVIDER_ID" optionally followed + by >=VERSION. + :return: requirement for the provider that can be used as dependency. + """ if ">=" in provider_spec: # we cannot import `airflow` here directly as it would pull re2 and a number of airflow # dependencies so we need to read airflow version by matching a regexp @@ -577,29 +594,48 @@ def get_provider_requirement(provider_spec: str) -> str: return f"apache-airflow-providers-{provider_spec.replace('.', '-')}" -# if providers are ready, we can preinstall them -PREINSTALLED_PROVIDERS = [ +# if providers are ready, we build provider requirements for them +PREINSTALLED_PROVIDER_REQUIREMENTS = [ get_provider_requirement(provider_spec) for provider_spec in PRE_INSTALLED_PROVIDERS if PROVIDER_DEPENDENCIES[get_provider_id(provider_spec)]["state"] == "ready" ] -# if provider is in not-ready or pre-release, we need to install its dependencies -# however we need to skip apache-airflow itself and potentially any providers that are -PREINSTALLED_NOT_READY_DEPS = [] +# Here we keep all pre-installed provider dependencies, so that we can add them as requirements in +# editable build to make sure that all dependencies are installed when we install Airflow in editable mode +# We need to skip apache-airflow min-versions and flag (exit) when pre-installed provider has +# dependency to another provider +ALL_PREINSTALLED_PROVIDER_DEPS: list[str] = [] + +# We very rarely - and only for the time when we plan to release a new preinstalled provider in next release +# we have the preinstalled provider that is in non-ready state. +# If provider is in not-ready state, we need to install its dependencies in editable mode as well as +# when we are building the wheel in CI. In pre-release branch we should never have a non-ready provider +# added, so this will only be used in main branch for CI builds. +PREINSTALLED_NOT_READY_PROVIDER_DEPS: list[str] = [] + for provider_spec in PRE_INSTALLED_PROVIDERS: provider_id = get_provider_id(provider_spec) - if PROVIDER_DEPENDENCIES[provider_id]["state"] not in ["ready", "suspended", "removed"]: - for dependency in PROVIDER_DEPENDENCIES[provider_id]["deps"]: - if dependency.startswith("apache-airflow-providers"): - msg = ( - f"The provider {provider_id} is pre-installed and it has as dependency " - f"to another provider {dependency}. This is not allowed. Pre-installed" - f"providers should only have 'apache-airflow' and regular dependencies." - ) - raise SystemExit(msg) - if not dependency.startswith("apache-airflow"): - PREINSTALLED_NOT_READY_DEPS.append(dependency) + for dependency in PROVIDER_DEPENDENCIES[provider_id]["deps"]: + if ( + dependency.startswith("apache-airflow-providers") + and get_provider_id(dependency) not in PRE_INSTALLED_PROVIDERS + ): + msg = ( + f"The provider {provider_id} is pre-installed and it has a dependency " + f"to another provider {dependency} which is not preinstalled. This is not allowed. " + f"Pre-installed providers should only have 'apache-airflow', other preinstalled providers" + f"and regular non-airflow dependencies." + ) + raise SystemExit(msg) + if not dependency.startswith("apache-airflow"): + if PROVIDER_DEPENDENCIES[provider_id]["state"] not in ["suspended", "removed"]: + ALL_PREINSTALLED_PROVIDER_DEPS.append(dependency) + if PROVIDER_DEPENDENCIES[provider_id]["state"] in ["not-ready"]: + PREINSTALLED_NOT_READY_PROVIDER_DEPS.append(dependency) + +ALL_PREINSTALLED_PROVIDER_DEPS = sorted(set(ALL_PREINSTALLED_PROVIDER_DEPS)) +PREINSTALLED_NOT_READY_PROVIDER_DEPS = sorted(set(PREINSTALLED_NOT_READY_PROVIDER_DEPS)) class CustomBuild(BuilderInterface[BuilderConfig, PluginManager]): @@ -608,7 +644,6 @@ class CustomBuild(BuilderInterface[BuilderConfig, PluginManager]): # Note that this name of the plugin MUST be `custom` - as long as we use it from custom # hatch_build.py file and not from external plugin. See note in the: # https://hatch.pypa.io/latest/plugins/build-hook/custom/#example - # PLUGIN_NAME = "custom" def clean(self, directory: str, versions: Iterable[str]) -> None: @@ -689,16 +724,29 @@ GENERATED_DEPENDENCIES_START = "# START OF GENERATED DEPENDENCIES" GENERATED_DEPENDENCIES_END = "# END OF GENERATED DEPENDENCIES" -def convert_to_extra_dependency(dependency: str) -> str: +def convert_to_extra_dependency(provider_requirement: str) -> str: + """ + Convert provider specification to extra dependency. + + :param provider_requirement: requirement of the provider in the form of apache-airflow-provider-*, + optionally followed by >=VERSION. + :return: extra dependency in the form of apache-airflow[extra] + """ # if there is version in dependency - remove it as we do not need it in extra specification # for editable installation - if ">=" in dependency: - dependency = dependency.split(">=")[0] - extra = dependency.replace("apache-airflow-providers-", "").replace("-", "_").replace(".", "_") + if ">=" in provider_requirement: + provider_requirement = provider_requirement.split(">=")[0] + extra = provider_requirement.replace("apache-airflow-providers-", "").replace("-", "_").replace(".", "_") return f"apache-airflow[{extra}]" def get_python_exclusion(excluded_python_versions: list[str]): + """ + Produce the Python exclusion that should be used - converted from the list of python versions. + + :param excluded_python_versions: list of python versions to exclude the dependency for. + :return: python version exclusion string that can be added to dependency in specification. + """ exclusion = "" if excluded_python_versions: separator = ";" @@ -709,6 +757,12 @@ def get_python_exclusion(excluded_python_versions: list[str]): def skip_for_editable_build(excluded_python_versions: list[str]) -> bool: + """ + Whether the dependency should be skipped for editable build for current python version. + + :param excluded_python_versions: list of excluded python versions. + :return: True if the dependency should be skipped for editable build for the current python version. + """ current_python_version = f"{sys.version_info.major}.{sys.version_info.minor}" if current_python_version in excluded_python_versions: return True @@ -716,7 +770,23 @@ def skip_for_editable_build(excluded_python_versions: list[str]) -> bool: class CustomBuildHook(BuildHookInterface[BuilderConfig]): - """Custom build hook for Airflow - remove devel extras and adds preinstalled providers.""" + """ + Custom build hook for Airflow. + + Generates required and optional dependencies depends on the build `version`. + + - standard: Generates all dependencies for the standard (.whl) package: + * devel and doc extras not included + * core extras and "production" bundle extras included + * provider optional dependencies resolve to "apache-airflow-providers-{provider}" + * pre-installed providers added as required dependencies + + - editable: Generates all dependencies for the editable installation: + * devel and doc extras (including devel bundle extras are included) + * core extras and "production" bundles included + * provider optional dependencies resolve to provider dependencies including devel dependencies + * pre-installed providers not included - their dependencies included in devel extras + """ def __init__(self, *args: Any, **kwargs: Any) -> None: # Stores all dependencies that that any of the airflow extras (including devel) use @@ -734,6 +804,9 @@ class CustomBuildHook(BuildHookInterface[BuilderConfig]): Initialize hook immediately before each build. Any modifications to the build data will be seen by the build target. + + :param version: "standard" or "editable" build. + :param build_data: build data dictionary. """ self._process_all_built_in_extras(version) self._process_all_provider_extras(version) @@ -759,10 +832,10 @@ class CustomBuildHook(BuildHookInterface[BuilderConfig]): if version == "standard": # Inject preinstalled providers into the dependencies for standard packages - for provider in PREINSTALLED_PROVIDERS: - self._dependencies.append(provider) - for not_ready_provider_dependency in PREINSTALLED_NOT_READY_DEPS: - self._dependencies.append(not_ready_provider_dependency) + self._dependencies.extend(PREINSTALLED_PROVIDER_REQUIREMENTS) + self._dependencies.extend(PREINSTALLED_NOT_READY_PROVIDER_DEPS) + else: + self._dependencies.extend(ALL_PREINSTALLED_PROVIDER_DEPS) # with hatchling, we can modify dependencies dynamically by modifying the build_data build_data["dependencies"] = self._dependencies @@ -777,11 +850,10 @@ class CustomBuildHook(BuildHookInterface[BuilderConfig]): Add devel_ci_dependencies. Adds all external dependencies which are not apache-airflow deps to the list of dependencies - that are going to be added to `devel-ci` extra. + that are going to be added to `devel-ci` extra. Optionally exclude dependencies for specific + python versions. :param deps: list of dependencies to add - :param version: "standard" or "editable" build. - :param excluded_python_versions: List of python versions to exclude :param python_exclusion: Python version exclusion string. """ for dep in deps: @@ -796,7 +868,6 @@ class CustomBuildHook(BuildHookInterface[BuilderConfig]): and providers for wheel builds. :param version: "standard" or "editable" build. - :return: """ for dependency_id in PROVIDER_DEPENDENCIES.keys(): if PROVIDER_DEPENDENCIES[dependency_id]["state"] != "ready": @@ -839,7 +910,6 @@ class CustomBuildHook(BuildHookInterface[BuilderConfig]): used to produce "all" extra. :param version: "standard" or "editable" build. - :return: """ for dict, _ in ALL_DYNAMIC_EXTRA_DICTS: for extra, deps in dict.items():