This is an automated email from the ASF dual-hosted git repository.

potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git


The following commit(s) were added to refs/heads/main by this push:
     new e11a111a50 Nicer pyproject.toml dependency generation (#37114)
e11a111a50 is described below

commit e11a111a50348a18cbe0b8b0426dd7796b1aaf31
Author: Jarek Potiuk <[email protected]>
AuthorDate: Wed Jan 31 12:54:50 2024 +0100

    Nicer pyproject.toml dependency generation (#37114)
    
    The pyproject.toml generated dependencies from providers are now
    generated in a bit more streamlined way:
    
    * the "empty" dependencies are now single-line empty arrays, to make
      github renderer of pyproject.toml happied (especially when showing
      diff in preview
    
    * instead of calculating hashes and preventing generation of
      pyproject.toml, we now always generate it when the pre-commit is run.
      This is possible because generation is stable and produces always the
      same results from the same input, so we can safely regenerate the file
      in CI with `--all-files` and the file will not be changed. This way
      we avoid Hash collision when we have parallel changes coming to
      different providers.
---
 pyproject.toml                                     | 19 ++----
 .../pre_commit_update_providers_dependencies.py    | 76 +++++++++-------------
 2 files changed, 37 insertions(+), 58 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index e4c7002c2b..cb05b9735c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -521,7 +521,6 @@ winrm = [
 # If you want to modify these - modify the corresponding provider.yaml instead.
 
#############################################################################################################
 # START OF GENERATED DEPENDENCIES
-# Hash of dependencies: ad91a0758ca9b408679bd3ea3ec22c66
 airbyte = [ # source: airflow/providers/airbyte/provider.yaml
   "apache-airflow[http]",
 ]
@@ -596,8 +595,7 @@ apache-livy = [ # source: 
airflow/providers/apache/livy/provider.yaml
   "apache-airflow[http]",
   "asgiref",
 ]
-apache-pig = [ # source: airflow/providers/apache/pig/provider.yaml
-]
+apache-pig = [] # source: airflow/providers/apache/pig/provider.yaml
 apache-pinot = [ # source: airflow/providers/apache/pinot/provider.yaml
   "apache-airflow[common_sql]",
   "pinotdb>0.4.7",
@@ -638,8 +636,7 @@ cncf-kubernetes = [ # source: 
airflow/providers/cncf/kubernetes/provider.yaml
 cohere = [ # source: airflow/providers/cohere/provider.yaml
   "cohere>=4.37",
 ]
-common-io = [ # source: airflow/providers/common/io/provider.yaml
-]
+common-io = [] # source: airflow/providers/common/io/provider.yaml
 common-sql = [ # source: airflow/providers/common/sql/provider.yaml
   "sqlparse>=0.4.2",
 ]
@@ -687,8 +684,7 @@ fab = [ # source: airflow/providers/fab/provider.yaml
 facebook = [ # source: airflow/providers/facebook/provider.yaml
   "facebook-business>=6.0.2",
 ]
-ftp = [ # source: airflow/providers/ftp/provider.yaml
-]
+ftp = [] # source: airflow/providers/ftp/provider.yaml
 github = [ # source: airflow/providers/github/provider.yaml
   "PyGithub!=1.58",
 ]
@@ -766,8 +762,7 @@ http = [ # source: airflow/providers/http/provider.yaml
   "requests>=2.26.0",
   "requests_toolbelt",
 ]
-imap = [ # source: airflow/providers/imap/provider.yaml
-]
+imap = [] # source: airflow/providers/imap/provider.yaml
 influxdb = [ # source: airflow/providers/influxdb/provider.yaml
   "influxdb-client>=1.19.0",
   "requests>=2.26.0",
@@ -835,8 +830,7 @@ odbc = [ # source: airflow/providers/odbc/provider.yaml
 openai = [ # source: airflow/providers/openai/provider.yaml
   "openai[datalib]>=1.0",
 ]
-openfaas = [ # source: airflow/providers/openfaas/provider.yaml
-]
+openfaas = [] # source: airflow/providers/openfaas/provider.yaml
 openlineage = [ # source: airflow/providers/openlineage/provider.yaml
   "apache-airflow[common_sql]",
   "attrs>=22.2",
@@ -904,8 +898,7 @@ slack = [ # source: airflow/providers/slack/provider.yaml
   "apache-airflow[common_sql]",
   "slack_sdk>=3.19.0",
 ]
-smtp = [ # source: airflow/providers/smtp/provider.yaml
-]
+smtp = [] # source: airflow/providers/smtp/provider.yaml
 snowflake = [ # source: airflow/providers/snowflake/provider.yaml
   "apache-airflow[common_sql]",
   "snowflake-connector-python>=2.7.8",
diff --git a/scripts/ci/pre_commit/pre_commit_update_providers_dependencies.py 
b/scripts/ci/pre_commit/pre_commit_update_providers_dependencies.py
index ca502e0d0b..dae81af35b 100755
--- a/scripts/ci/pre_commit/pre_commit_update_providers_dependencies.py
+++ b/scripts/ci/pre_commit/pre_commit_update_providers_dependencies.py
@@ -17,10 +17,8 @@
 # under the License.
 from __future__ import annotations
 
-import hashlib
 import json
 import os
-import re
 import sys
 from ast import Import, ImportFrom, NodeVisitor, parse
 from collections import defaultdict
@@ -238,20 +236,25 @@ def generate_dependencies(
     for dependency, dependency_info in dependencies.items():
         if dependency_info["state"] in ["suspended", "removed"]:
             continue
+        deps = dependency_info["deps"]
+        deps = [dep for dep in deps if not dep.startswith("apache-airflow>=")]
+        devel_deps = dependency_info.get("devel-deps")
+        if not deps and not devel_deps:
+            result_content.append(
+                f"{normalize_extra(dependency)} = [] "
+                f"# source: airflow/providers/{dependency.replace('.', 
'/')}/provider.yaml"
+            )
+            continue
         result_content.append(
             f"{normalize_extra(dependency)} = "
             f"[ # source: airflow/providers/{dependency.replace('.', 
'/')}/provider.yaml"
         )
-        deps = dependency_info["deps"]
         if not isinstance(deps, list):
             raise TypeError(f"Wrong type of 'deps' {deps} for {dependency} in 
{DEPENDENCIES_JSON_FILE_PATH}")
         for dep in deps:
             if dep.startswith("apache-airflow-providers-"):
                 dep = convert_to_extra_dependency(dep)
-            elif dep.startswith("apache-airflow>="):
-                continue
             result_content.append(f'  
"{dep}{get_python_exclusion(dependency_info)}",')
-        devel_deps = dependency_info.get("devel-deps")
         if devel_deps:
             result_content.append(f"  # Devel dependencies for the 
{dependency} provider")
             for dep in devel_deps:
@@ -284,7 +287,7 @@ def get_dependency_type(dependency_type: str) -> 
ParsedDependencyTypes | None:
     return None
 
 
-def update_pyproject_toml(dependencies: dict[str, dict[str, list[str] | str]], 
dependencies_hash: str):
+def update_pyproject_toml(dependencies: dict[str, dict[str, list[str] | str]]) 
-> bool:
     file_content = PYPROJECT_TOML_FILE_PATH.read_text()
     result_content: list[str] = []
     copying = True
@@ -295,7 +298,6 @@ def update_pyproject_toml(dependencies: dict[str, dict[str, 
list[str] | str]], d
             result_content.append(line)
         if line.strip().startswith(GENERATED_DEPENDENCIES_START):
             copying = False
-            result_content.append(f"# Hash of dependencies: 
{dependencies_hash}")
             generate_dependencies(result_content, dependencies)
         elif line.strip().startswith(GENERATED_DEPENDENCIES_END):
             copying = True
@@ -320,25 +322,13 @@ def update_pyproject_toml(dependencies: dict[str, 
dict[str, list[str] | str]], d
             if line.strip().endswith(" = ["):
                 FOUND_EXTRAS[current_type].append(line.split(" = 
[")[0].strip())
         line_count += 1
-    PYPROJECT_TOML_FILE_PATH.write_text("\n".join(result_content) + "\n")
-
-
-def calculate_my_hash():
-    my_file = MY_FILE.resolve()
-    hash_md5 = hashlib.md5()
-    hash_md5.update(my_file.read_bytes())
-    return hash_md5.hexdigest()
-
-
-def calculate_dependencies_hash(dependencies: str):
-    my_file = MY_FILE.resolve()
-    hash_md5 = hashlib.md5()
-    hash_md5.update(my_file.read_bytes())
-    hash_md5.update(dependencies.encode(encoding="utf-8"))
-    return hash_md5.hexdigest()
-
+    result_content.append("")
+    new_file_content = "\n".join(result_content)
+    if file_content != new_file_content:
+        PYPROJECT_TOML_FILE_PATH.write_text(new_file_content)
+        return True
+    return False
 
-HASH_REGEXP = re.compile(r"# Hash of dependencies: (?P<hash>[a-f0-9]+)")
 
 if __name__ == "__main__":
     find_all_providers_and_provider_files()
@@ -381,16 +371,10 @@ if __name__ == "__main__":
     )
     new_dependencies = json.dumps(unique_sorted_dependencies, indent=2) + "\n"
     old_md5sum = MY_MD5SUM_FILE.read_text().strip() if MY_MD5SUM_FILE.exists() 
else ""
-    new_md5sum = calculate_my_hash()
-    find_hash = HASH_REGEXP.findall(PYPROJECT_TOML_FILE_PATH.read_text())
-    dependencies_hash_from_pyproject_toml = find_hash[0] if find_hash else ""
-    dependencies_hash = calculate_dependencies_hash(new_dependencies)
-    if (
-        new_dependencies != old_dependencies
-        or new_md5sum != old_md5sum
-        or dependencies_hash_from_pyproject_toml != dependencies_hash
-    ):
-        
DEPENDENCIES_JSON_FILE_PATH.write_text(json.dumps(unique_sorted_dependencies, 
indent=2) + "\n")
+    old_content = DEPENDENCIES_JSON_FILE_PATH.read_text() if 
DEPENDENCIES_JSON_FILE_PATH.exists() else ""
+    new_content = json.dumps(unique_sorted_dependencies, indent=2) + "\n"
+    DEPENDENCIES_JSON_FILE_PATH.write_text(new_content)
+    if new_content != old_content:
         if os.environ.get("CI"):
             console.print()
             console.print(f"There is a need to regenerate 
{DEPENDENCIES_JSON_FILE_PATH}")
@@ -408,13 +392,15 @@ if __name__ == "__main__":
             )
             console.print(f"Written {DEPENDENCIES_JSON_FILE_PATH}")
             console.print()
-            update_pyproject_toml(unique_sorted_dependencies, 
dependencies_hash)
-            console.print(f"Written {PYPROJECT_TOML_FILE_PATH}")
+    if update_pyproject_toml(unique_sorted_dependencies):
+        if os.environ.get("CI"):
+            console.print(f"There is a need to regenerate 
{PYPROJECT_TOML_FILE_PATH}")
+            console.print(
+                f"[red]You need to run the following command locally and 
commit generated "
+                f"{PYPROJECT_TOML_FILE_PATH.relative_to(AIRFLOW_SOURCES_ROOT)} 
file:\n"
+            )
+            console.print("breeze static-checks --type 
update-providers-dependencies --all-files")
             console.print()
-            MY_MD5SUM_FILE.write_text(new_md5sum + "\n")
-        sys.exit(1)
-    else:
-        console.print(
-            "[green]No need to regenerate dependencies!\n[/]"
-            f"The 
{DEPENDENCIES_JSON_FILE_PATH.relative_to(AIRFLOW_SOURCES_ROOT)} is up to 
date!\n"
-        )
+        else:
+            console.print(f"Written {PYPROJECT_TOML_FILE_PATH}")
+    console.print()

Reply via email to