This is an automated email from the ASF dual-hosted git repository.

potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git


The following commit(s) were added to refs/heads/main by this push:
     new d409b8be96 Improve editable airflow installation by adding 
preinstalled deps (#38764)
d409b8be96 is described below

commit d409b8be966b51207e10129e7709e943a5dc7ac3
Author: Jarek Potiuk <ja...@potiuk.com>
AuthorDate: Mon Apr 8 18:56:17 2024 +0200

    Improve editable airflow installation by adding preinstalled deps (#38764)
    
    * Improve editable airflow installation by adding preinstalled deps
    
    While preparing for presentation about modernizing packaging setup
    I found out that we can slightly improve editable installation of
    airflow. So far we required a "devel" installation in order to make
    a working editable installation of airflow (because the devel extra
    was installing dependencies for all the pre-installed providers).
    
    However that required to synchronize the list of providers installed
    in `devel` dependency with the list of preinstalled providers, as
    well as it made `pip install -e .` resulting with a non-working
    version of Airflow (because it requires `fab` provider dependencies
    to start airflow webserver).
    
    This PR improves it - in editable mode, instead of adding the
    pre-installed providers, we add their dependencies. This way we can
    remove the pre-installed providers from the list of devel providers
    to install - because now the pre-installed provider dependencies
    are installed simply as "required" dependencies.
    
    As a result - simple `pip install -e .` should now result in fully
    working airflow installation - without all the devel goodies and
    without celery and kubernetes dependencies, but fully usable for
    sequential and local executor cases.
    
    Also reviewed and updated the comments in hatch_build.py to better
    reflect the purpose and behaviour of some of the methods there.
    
    * Update hatch_build.py
    
    Co-authored-by: Ephraim Anierobi <splendidzig...@gmail.com>
    
    ---------
    
    Co-authored-by: Ephraim Anierobi <splendidzig...@gmail.com>
---
 airflow_pre_installed_providers.txt |   9 ---
 hatch_build.py                      | 150 ++++++++++++++++++++++++++----------
 2 files changed, 110 insertions(+), 49 deletions(-)

diff --git a/airflow_pre_installed_providers.txt 
b/airflow_pre_installed_providers.txt
deleted file mode 100644
index e717ea696e..0000000000
--- a/airflow_pre_installed_providers.txt
+++ /dev/null
@@ -1,9 +0,0 @@
-# List of all the providers that are pre-installed when you run `pip install 
apache-airflow` without extras
-common.io
-common.sql
-fab>=1.0.2rc1
-ftp
-http
-imap
-smtp
-sqlite
diff --git a/hatch_build.py b/hatch_build.py
index 69b6da7a1c..532ee7d3e3 100644
--- a/hatch_build.py
+++ b/hatch_build.py
@@ -273,8 +273,6 @@ DEVEL_EXTRAS: dict[str, list[str]] = {
     "devel": [
         "apache-airflow[celery]",
         "apache-airflow[cncf-kubernetes]",
-        "apache-airflow[common-io]",
-        "apache-airflow[common-sql]",
         "apache-airflow[devel-debuggers]",
         "apache-airflow[devel-devscripts]",
         "apache-airflow[devel-duckdb]",
@@ -282,11 +280,6 @@ DEVEL_EXTRAS: dict[str, list[str]] = {
         "apache-airflow[devel-sentry]",
         "apache-airflow[devel-static-checks]",
         "apache-airflow[devel-tests]",
-        "apache-airflow[fab]",
-        "apache-airflow[ftp]",
-        "apache-airflow[http]",
-        "apache-airflow[imap]",
-        "apache-airflow[sqlite]",
     ],
     "devel-all-dbs": [
         "apache-airflow[apache-cassandra]",
@@ -550,11 +543,35 @@ ALL_DYNAMIC_EXTRAS: list[str] = sorted(
 
 
 def get_provider_id(provider_spec: str) -> str:
-    # in case provider_spec is "<provider_id>=<version>"
-    return provider_spec.split(">=")[0]
+    """
+    Extract provider id from provider specification.
+
+    :param provider_spec: provider specification can be in the form of the 
"PROVIDER_ID" or
+           "apache-airflow-providers-PROVIDER", optionally followed by 
">=VERSION".
+
+    :return: short provider_id with `.` instead of `-` in case of `apache` and 
other providers with
+             `-` in the name.
+    """
+    _provider_id = provider_spec.split(">=")[0]
+    if _provider_id.startswith("apache-airflow-providers-"):
+        _provider_id = _provider_id.replace("apache-airflow-providers-", 
"").replace("-", ".")
+    return _provider_id
 
 
 def get_provider_requirement(provider_spec: str) -> str:
+    """
+    Convert provider specification with provider_id to provider requirement.
+
+    The requirement can be used when constructing dependencies. It 
automatically adds pre-release specifier
+    in case we are building pre-release version of Airflow. This way we can 
handle the case when airflow
+    depends on specific version of the provider that has not yet been released 
- then we release the
+    pre-release version of provider to PyPI and airflow built in CI, or 
Airflow pre-release version will
+    automatically depend on that pre-release version of the provider.
+
+    :param provider_spec: provider specification can be in the form of the 
"PROVIDER_ID" optionally followed
+       by >=VERSION.
+    :return: requirement for the provider that can be used as dependency.
+    """
     if ">=" in provider_spec:
         # we cannot import `airflow` here directly as it would pull re2 and a 
number of airflow
         # dependencies so we need to read airflow version by matching a regexp
@@ -577,29 +594,48 @@ def get_provider_requirement(provider_spec: str) -> str:
         return f"apache-airflow-providers-{provider_spec.replace('.', '-')}"
 
 
-# if providers are ready, we can preinstall them
-PREINSTALLED_PROVIDERS = [
+# if providers are ready, we build provider requirements for them
+PREINSTALLED_PROVIDER_REQUIREMENTS = [
     get_provider_requirement(provider_spec)
     for provider_spec in PRE_INSTALLED_PROVIDERS
     if PROVIDER_DEPENDENCIES[get_provider_id(provider_spec)]["state"] == 
"ready"
 ]
 
-# if provider is in not-ready or pre-release, we need to install its 
dependencies
-# however we need to skip apache-airflow itself and potentially any providers 
that are
-PREINSTALLED_NOT_READY_DEPS = []
+# Here we keep all pre-installed provider dependencies, so that we can add 
them as requirements in
+# editable build to make sure that all dependencies are installed when we 
install Airflow in editable mode
+# We need to skip apache-airflow min-versions and flag (exit) when 
pre-installed provider has
+# dependency to another provider
+ALL_PREINSTALLED_PROVIDER_DEPS: list[str] = []
+
+# We very rarely - and only for the time when we plan to release a new 
preinstalled provider in next release
+# we have the preinstalled provider that is in non-ready state.
+# If provider is in not-ready state, we need to install its dependencies in 
editable mode as well as
+# when we are building the wheel in CI. In pre-release branch we should never 
have a non-ready provider
+# added, so this will only be used in main branch for CI builds.
+PREINSTALLED_NOT_READY_PROVIDER_DEPS: list[str] = []
+
 for provider_spec in PRE_INSTALLED_PROVIDERS:
     provider_id = get_provider_id(provider_spec)
-    if PROVIDER_DEPENDENCIES[provider_id]["state"] not in ["ready", 
"suspended", "removed"]:
-        for dependency in PROVIDER_DEPENDENCIES[provider_id]["deps"]:
-            if dependency.startswith("apache-airflow-providers"):
-                msg = (
-                    f"The provider {provider_id} is pre-installed and it has 
as dependency "
-                    f"to another provider {dependency}. This is not allowed. 
Pre-installed"
-                    f"providers should only have 'apache-airflow' and regular 
dependencies."
-                )
-                raise SystemExit(msg)
-            if not dependency.startswith("apache-airflow"):
-                PREINSTALLED_NOT_READY_DEPS.append(dependency)
+    for dependency in PROVIDER_DEPENDENCIES[provider_id]["deps"]:
+        if (
+            dependency.startswith("apache-airflow-providers")
+            and get_provider_id(dependency) not in PRE_INSTALLED_PROVIDERS
+        ):
+            msg = (
+                f"The provider {provider_id} is pre-installed and it has a 
dependency "
+                f"to another provider {dependency} which is not preinstalled. 
This is not allowed. "
+                f"Pre-installed providers should only have 'apache-airflow', 
other preinstalled providers"
+                f"and regular non-airflow dependencies."
+            )
+            raise SystemExit(msg)
+        if not dependency.startswith("apache-airflow"):
+            if PROVIDER_DEPENDENCIES[provider_id]["state"] not in 
["suspended", "removed"]:
+                ALL_PREINSTALLED_PROVIDER_DEPS.append(dependency)
+                if PROVIDER_DEPENDENCIES[provider_id]["state"] in 
["not-ready"]:
+                    PREINSTALLED_NOT_READY_PROVIDER_DEPS.append(dependency)
+
+ALL_PREINSTALLED_PROVIDER_DEPS = sorted(set(ALL_PREINSTALLED_PROVIDER_DEPS))
+PREINSTALLED_NOT_READY_PROVIDER_DEPS = 
sorted(set(PREINSTALLED_NOT_READY_PROVIDER_DEPS))
 
 
 class CustomBuild(BuilderInterface[BuilderConfig, PluginManager]):
@@ -608,7 +644,6 @@ class CustomBuild(BuilderInterface[BuilderConfig, 
PluginManager]):
     # Note that this name of the plugin MUST be `custom` - as long as we use 
it from custom
     # hatch_build.py file and not from external plugin. See note in the:
     # https://hatch.pypa.io/latest/plugins/build-hook/custom/#example
-    #
     PLUGIN_NAME = "custom"
 
     def clean(self, directory: str, versions: Iterable[str]) -> None:
@@ -689,16 +724,29 @@ GENERATED_DEPENDENCIES_START = "# START OF GENERATED 
DEPENDENCIES"
 GENERATED_DEPENDENCIES_END = "# END OF GENERATED DEPENDENCIES"
 
 
-def convert_to_extra_dependency(dependency: str) -> str:
+def convert_to_extra_dependency(provider_requirement: str) -> str:
+    """
+    Convert provider specification to extra dependency.
+
+    :param provider_requirement: requirement of the provider in the form of 
apache-airflow-provider-*,
+        optionally followed by >=VERSION.
+    :return: extra dependency in the form of apache-airflow[extra]
+    """
     # if there is version in dependency - remove it as we do not need it in 
extra specification
     # for editable installation
-    if ">=" in dependency:
-        dependency = dependency.split(">=")[0]
-    extra = dependency.replace("apache-airflow-providers-", "").replace("-", 
"_").replace(".", "_")
+    if ">=" in provider_requirement:
+        provider_requirement = provider_requirement.split(">=")[0]
+    extra = provider_requirement.replace("apache-airflow-providers-", 
"").replace("-", "_").replace(".", "_")
     return f"apache-airflow[{extra}]"
 
 
 def get_python_exclusion(excluded_python_versions: list[str]):
+    """
+    Produce the Python exclusion that should be used - converted from the list 
of python versions.
+
+    :param excluded_python_versions: list of python versions to exclude the 
dependency for.
+    :return: python version exclusion string that can be added to dependency 
in specification.
+    """
     exclusion = ""
     if excluded_python_versions:
         separator = ";"
@@ -709,6 +757,12 @@ def get_python_exclusion(excluded_python_versions: 
list[str]):
 
 
 def skip_for_editable_build(excluded_python_versions: list[str]) -> bool:
+    """
+    Whether the dependency should be skipped for editable build for current 
python version.
+
+    :param excluded_python_versions: list of excluded python versions.
+    :return: True if the dependency should be skipped for editable build for 
the current python version.
+    """
     current_python_version = 
f"{sys.version_info.major}.{sys.version_info.minor}"
     if current_python_version in excluded_python_versions:
         return True
@@ -716,7 +770,23 @@ def skip_for_editable_build(excluded_python_versions: 
list[str]) -> bool:
 
 
 class CustomBuildHook(BuildHookInterface[BuilderConfig]):
-    """Custom build hook for Airflow - remove devel extras and adds 
preinstalled providers."""
+    """
+    Custom build hook for Airflow.
+
+    Generates required and optional dependencies depends on the build 
`version`.
+
+    - standard: Generates all dependencies for the standard (.whl) package:
+       * devel and doc extras not included
+       * core extras and "production" bundle extras included
+       * provider optional dependencies resolve to 
"apache-airflow-providers-{provider}"
+       * pre-installed providers added as required dependencies
+
+    - editable: Generates all dependencies for the editable installation:
+       * devel and doc extras (including devel bundle extras are included)
+       * core extras and "production" bundles included
+       * provider optional dependencies resolve to provider dependencies 
including devel dependencies
+       * pre-installed providers not included - their dependencies included in 
devel extras
+    """
 
     def __init__(self, *args: Any, **kwargs: Any) -> None:
         # Stores all dependencies that that any of the airflow extras 
(including devel) use
@@ -734,6 +804,9 @@ class CustomBuildHook(BuildHookInterface[BuilderConfig]):
         Initialize hook immediately before each build.
 
         Any modifications to the build data will be seen by the build target.
+
+        :param version: "standard" or "editable" build.
+        :param build_data: build data dictionary.
         """
         self._process_all_built_in_extras(version)
         self._process_all_provider_extras(version)
@@ -759,10 +832,10 @@ class CustomBuildHook(BuildHookInterface[BuilderConfig]):
 
         if version == "standard":
             # Inject preinstalled providers into the dependencies for standard 
packages
-            for provider in PREINSTALLED_PROVIDERS:
-                self._dependencies.append(provider)
-            for not_ready_provider_dependency in PREINSTALLED_NOT_READY_DEPS:
-                self._dependencies.append(not_ready_provider_dependency)
+            self._dependencies.extend(PREINSTALLED_PROVIDER_REQUIREMENTS)
+            self._dependencies.extend(PREINSTALLED_NOT_READY_PROVIDER_DEPS)
+        else:
+            self._dependencies.extend(ALL_PREINSTALLED_PROVIDER_DEPS)
 
         # with hatchling, we can modify dependencies dynamically by modifying 
the build_data
         build_data["dependencies"] = self._dependencies
@@ -777,11 +850,10 @@ class CustomBuildHook(BuildHookInterface[BuilderConfig]):
         Add devel_ci_dependencies.
 
         Adds all external dependencies which are not apache-airflow deps to 
the list of dependencies
-        that are going to be added to `devel-ci` extra.
+        that are going to be added to `devel-ci` extra. Optionally exclude 
dependencies for specific
+        python versions.
 
         :param deps: list of dependencies to add
-        :param version: "standard" or "editable" build.
-        :param excluded_python_versions: List of python versions to exclude
         :param python_exclusion: Python version exclusion string.
         """
         for dep in deps:
@@ -796,7 +868,6 @@ class CustomBuildHook(BuildHookInterface[BuilderConfig]):
         and providers for wheel builds.
 
         :param version: "standard" or "editable" build.
-        :return:
         """
         for dependency_id in PROVIDER_DEPENDENCIES.keys():
             if PROVIDER_DEPENDENCIES[dependency_id]["state"] != "ready":
@@ -839,7 +910,6 @@ class CustomBuildHook(BuildHookInterface[BuilderConfig]):
         used to produce "all" extra.
 
         :param version: "standard" or "editable" build.
-        :return:
         """
         for dict, _ in ALL_DYNAMIC_EXTRA_DICTS:
             for extra, deps in dict.items():

Reply via email to