ashb commented on a change in pull request #13439: URL: https://github.com/apache/airflow/pull/13439#discussion_r551397295
########## File path: setup.py ########## @@ -780,76 +826,135 @@ def get_provider_package_from_package_id(package_id: str): class AirflowDistribution(Distribution): - """setuptools.Distribution subclass with Airflow specific behaviour""" + """ + The setuptools.Distribution subclass with Airflow specific behaviour + + The reason for pylint: disable=signature-differs of parse_config_files is explained here: + https://github.com/PyCQA/pylint/issues/3737 + + """ - # https://github.com/PyCQA/pylint/issues/3737 def parse_config_files(self, *args, **kwargs): # pylint: disable=signature-differs """ Ensure that when we have been asked to install providers from sources - that we don't *also* try to install those providers from PyPI + that we don't *also* try to install those providers from PyPI. + Also we should make sure that in this case we copy provider.yaml files so that + Providers manager can find package information. """ super().parse_config_files(*args, **kwargs) - if os.getenv('INSTALL_PROVIDERS_FROM_SOURCES') == 'true': + if os.getenv(INSTALL_PROVIDERS_FROM_SOURCES) == 'true': self.install_requires = [ # noqa pylint: disable=attribute-defined-outside-init req for req in self.install_requires if not req.startswith('apache-airflow-providers-') ] + provider_yaml_files = glob.glob("airflow/providers/**/provider.yaml", recursive=True) + for provider_yaml_file in provider_yaml_files: + provider_relative_path = relpath(provider_yaml_file, os.path.join(my_dir, "airflow")) + self.package_data['airflow'].append(provider_relative_path) else: self.install_requires.extend( [get_provider_package_from_package_id(package_id) for package_id in PREINSTALLED_PROVIDERS] ) -def add_provider_packages_to_requirements(extra_with_providers: str, providers: List[str]): +def add_provider_packages_to_extras_requirements(extra: str, providers: List[str]) -> None: """ - Adds provider packages to requirements + Adds provider packages to requirements of extra. - :param extra_with_providers: Name of the extra to add providers to - :param providers: list of provider names + :param extra: Name of the extra to add providers to + :param providers: list of provider ids """ - EXTRAS_WITH_PROVIDERS.add(extra_with_providers) - EXTRAS_REQUIREMENTS[extra_with_providers].extend( + EXTRAS_WITH_PROVIDERS.add(extra) + EXTRAS_REQUIREMENTS[extra].extend( [get_provider_package_from_package_id(package_name) for package_name in providers] ) -def add_all_provider_packages(): +def add_all_provider_packages() -> None: """ - In case of regular installation (when INSTALL_PROVIDERS_FROM_SOURCES is false), we should - add extra dependencies to Airflow - to get the providers automatically installed when - those extras are installed. + In case of regular installation (providers installed from packages), we should add extra dependencies to + Airflow - to get the providers automatically installed when those extras are installed. + + For providers installed from sources we skip that step. That helps to test and install airflow with + all packages in CI - for example when new providers are added, otherwise the installation would fail + as the new provider is not yet in PyPI. """ for provider in ALL_PROVIDERS: - add_provider_packages_to_requirements(provider, [provider]) - add_provider_packages_to_requirements("all", ALL_PROVIDERS) - add_provider_packages_to_requirements("devel_ci", ALL_PROVIDERS) - add_provider_packages_to_requirements("devel_all", ALL_PROVIDERS) - add_provider_packages_to_requirements("all_dbs", ALL_DB_PROVIDERS) - add_provider_packages_to_requirements("devel_hadoop", ["apache.hdfs", "apache.hive", "presto"]) + add_provider_packages_to_extras_requirements(provider, [provider]) + add_provider_packages_to_extras_requirements("all", ALL_PROVIDERS) + add_provider_packages_to_extras_requirements("devel_ci", ALL_PROVIDERS) + add_provider_packages_to_extras_requirements("devel_all", ALL_PROVIDERS) + add_provider_packages_to_extras_requirements("all_dbs", ALL_DB_PROVIDERS) + add_provider_packages_to_extras_requirements("devel_hadoop", ["apache.hdfs", "apache.hive", "presto"]) + + +class Develop(develop_orig): + """Forces removal of providers in editable mode.""" + + def run(self): + self.announce('Installing in editable mode. Uninstalling provider packages!', level=log.INFO) + # We need to run "python3 -m pip" because it might be that older PIP binary is in the path + # And it results with an error when running pip directly (cannot import pip module) + # also PIP does not have a stable API so we have to run subprocesses ¯\_(ツ)_/¯ + try: + installed_packages = ( + subprocess.check_output(["python3", "-m", "pip", "freeze"]).decode().splitlines() + ) + airflow_provider_packages = [ + package_line.split("=")[0] + for package_line in installed_packages + if package_line.startswith("apache-airflow-providers") + ] + self.announce(f'Uninstalling ${airflow_provider_packages}!', level=log.INFO) + subprocess.check_call(["python3", "-m", "pip", "uninstall", "--yes", *airflow_provider_packages]) + except subprocess.CalledProcessError as e: + self.announce(f'Error when uninstalling airflow provider packages: {e}!', level=log.WARN) + super().run() + + +class Install(install_orig): + """Forces installation of providers from sources in editable mode.""" + def run(self): + self.announce('Standard installation. Providers are installed from packages', level=log.INFO) + super().run() -def do_setup(): - """Perform the Airflow package setup.""" + +def do_setup() -> None: + """ + Perform the Airflow package setup. + Most values come from setup.cfg, only the dynamically calculated ones are passed to setup Review comment: ```suggestion Perform the Airflow package setup. Most values come from setup.cfg, only the dynamically calculated ones are passed to setup ``` ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [email protected]
