This is an automated email from the ASF dual-hosted git repository.
potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/main by this push:
new 42017f5bd8 Protect against manually updated generated dependencies
(#37056)
42017f5bd8 is described below
commit 42017f5bd85749673c34fa0ef0f68151502eb50c
Author: Jarek Potiuk <[email protected]>
AuthorDate: Sun Jan 28 15:09:32 2024 +0100
Protect against manually updated generated dependencies (#37056)
The "generated/provider_dependencies.json" is regenerated automatically
together with pyproject.toml from all the provider.yaml files. The
file contains information about dependencies for all providers and
it is used to determine a number of actions when we build providers
(for example it is used to automatically generated cross-provider
dependencies, and determine whether provider is ready to be released)
The "dependencies" from provider_dependencies.json are also
reflected in pyproject.toml file in order to determine what should
be installed when you install specific editable extra, so pyproject.toml
is also generated automatically together with
provider_dependencies.json. All is good when that generation is done
automatically, but so far, when you updated provider_dependencies.json
manually, the pyproject.toml was not regenerated - it was actually
skipped from regeneration.
This PR changes it by storing hash of provider_depenedencies.json (mixed
with the has of the script that generates them) in the generated
pyproject.toml file. This way, both - provider_dependencies and
pyproject.toml wil always be regenerated by the
"update_providers_dependencies.py" pre-commit whenever there is any
inconsistency between provider.yaml files, provider_dependencies.json
or pyproject.toml.
---
pyproject.toml | 175 +++++++++++----------
.../pre_commit_update_providers_dependencies.py | 30 +++-
..._commit_update_providers_dependencies.py.md5sum | 2 +-
3 files changed, 115 insertions(+), 92 deletions(-)
diff --git a/pyproject.toml b/pyproject.toml
index a84b4af06f..e82b5897f9 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -524,15 +524,16 @@ winrm = [
# If you want to modify these - modify the corresponding provider.yaml instead.
#############################################################################################################
# START OF GENERATED DEPENDENCIES
-airbyte = [
+# Hash of dependencies: 6e486f782b4745afd1f0f19dabe1253c
+airbyte = [ # source: airflow/providers/airbyte/provider.yaml
"apache-airflow[http]",
]
-alibaba = [
+alibaba = [ # source: airflow/providers/alibaba/provider.yaml
"alibabacloud_adb20211201>=1.0.0",
"alibabacloud_tea_openapi>=0.3.7",
"oss2>=2.14.0",
]
-amazon = [
+amazon = [ # source: airflow/providers/amazon/provider.yaml
"PyAthena>=3.0.10",
"apache-airflow[common_sql]",
"apache-airflow[http]",
@@ -554,82 +555,82 @@ amazon = [
"mypy-boto3-s3>=1.33.0",
"s3fs>=2023.10.0",
]
-apache-beam = [
+apache-beam = [ # source: airflow/providers/apache/beam/provider.yaml
"apache-beam>=2.53.0",
"pyarrow>=14.0.1",
]
-apache-cassandra = [
+apache-cassandra = [ # source: airflow/providers/apache/cassandra/provider.yaml
"cassandra-driver>=3.13.0",
]
-apache-drill = [
+apache-drill = [ # source: airflow/providers/apache/drill/provider.yaml
"apache-airflow[common_sql]",
"sqlalchemy-drill>=1.1.0",
]
-apache-druid = [
+apache-druid = [ # source: airflow/providers/apache/druid/provider.yaml
"apache-airflow[common_sql]",
"pydruid>=0.4.1",
]
-apache-flink = [
+apache-flink = [ # source: airflow/providers/apache/flink/provider.yaml
"apache-airflow[cncf_kubernetes]",
"cryptography>=2.0.0",
]
-apache-hdfs = [
+apache-hdfs = [ # source: airflow/providers/apache/hdfs/provider.yaml
"hdfs[avro,dataframe,kerberos]>=2.0.4",
]
-apache-hive = [
+apache-hive = [ # source: airflow/providers/apache/hive/provider.yaml
"apache-airflow[common_sql]",
"hmsclient>=0.1.0",
"pandas>=1.2.5",
"pyhive[hive-pure-sasl]>=0.7.0",
"thrift>=0.9.2",
]
-apache-impala = [
+apache-impala = [ # source: airflow/providers/apache/impala/provider.yaml
"impyla>=0.18.0,<1.0",
]
-apache-kafka = [
+apache-kafka = [ # source: airflow/providers/apache/kafka/provider.yaml
"asgiref",
"confluent-kafka>=1.8.2",
]
-apache-kylin = [
+apache-kylin = [ # source: airflow/providers/apache/kylin/provider.yaml
"kylinpy>=2.6",
]
-apache-livy = [
+apache-livy = [ # source: airflow/providers/apache/livy/provider.yaml
"aiohttp",
"apache-airflow[http]",
"asgiref",
]
-apache-pig = [
+apache-pig = [ # source: airflow/providers/apache/pig/provider.yaml
]
-apache-pinot = [
+apache-pinot = [ # source: airflow/providers/apache/pinot/provider.yaml
"apache-airflow[common_sql]",
"pinotdb>0.4.7",
]
-apache-spark = [
+apache-spark = [ # source: airflow/providers/apache/spark/provider.yaml
"grpcio-status>=1.59.0",
"pyspark",
]
-apprise = [
+apprise = [ # source: airflow/providers/apprise/provider.yaml
"apprise",
]
-arangodb = [
+arangodb = [ # source: airflow/providers/arangodb/provider.yaml
"python-arango>=7.3.2",
]
-asana = [
+asana = [ # source: airflow/providers/asana/provider.yaml
"asana>=0.10,<4.0.0",
]
-atlassian-jira = [
+atlassian-jira = [ # source: airflow/providers/atlassian/jira/provider.yaml
"atlassian-python-api>=1.14.2,!=3.41.6",
"beautifulsoup4",
]
-celery = [
+celery = [ # source: airflow/providers/celery/provider.yaml
"celery>=5.3.0,<6,!=5.3.3,!=5.3.2",
"flower>=1.0.0",
"google-re2>=1.0",
]
-cloudant = [
+cloudant = [ # source: airflow/providers/cloudant/provider.yaml
"cloudant>=2.0",
]
-cncf-kubernetes = [
+cncf-kubernetes = [ # source: airflow/providers/cncf/kubernetes/provider.yaml
"aiofiles>=23.2.0",
"asgiref>=3.5.2",
"cryptography>=2.0.0",
@@ -637,15 +638,15 @@ cncf-kubernetes = [
"kubernetes>=21.7.0,<24",
"kubernetes_asyncio>=18.20.1,<25",
]
-cohere = [
+cohere = [ # source: airflow/providers/cohere/provider.yaml
"cohere>=4.37",
]
-common-io = [
+common-io = [ # source: airflow/providers/common/io/provider.yaml
]
-common-sql = [
+common-sql = [ # source: airflow/providers/common/sql/provider.yaml
"sqlparse>=0.4.2",
]
-databricks = [
+databricks = [ # source: airflow/providers/databricks/provider.yaml
"aiohttp>=3.6.3, <4",
"apache-airflow[common_sql]",
"databricks-sql-connector>=2.0.0, <3.0.0, !=2.9.0",
@@ -653,48 +654,48 @@ databricks = [
# Devel dependencies for the databricks provider
"deltalake>=0.12.0",
]
-datadog = [
+datadog = [ # source: airflow/providers/datadog/provider.yaml
"datadog>=0.14.0",
]
-dbt-cloud = [
+dbt-cloud = [ # source: airflow/providers/dbt/cloud/provider.yaml
"aiohttp",
"apache-airflow[http]",
"asgiref",
]
-dingding = [
+dingding = [ # source: airflow/providers/dingding/provider.yaml
"apache-airflow[http]",
]
-discord = [
+discord = [ # source: airflow/providers/discord/provider.yaml
"apache-airflow[http]",
]
-docker = [
+docker = [ # source: airflow/providers/docker/provider.yaml
"docker>=5.0.3",
"python-dotenv>=0.21.0",
]
-elasticsearch = [
+elasticsearch = [ # source: airflow/providers/elasticsearch/provider.yaml
"apache-airflow[common_sql]",
"elasticsearch>=8.10,<9",
]
-exasol = [
+exasol = [ # source: airflow/providers/exasol/provider.yaml
"apache-airflow[common_sql]",
"pandas>=1.2.5",
"pyexasol>=0.5.1",
]
-fab = [
+fab = [ # source: airflow/providers/fab/provider.yaml
"flask-appbuilder==4.3.10",
"flask-login>=0.6.2",
"flask>=2.2,<2.3",
"google-re2>=1.0",
]
-facebook = [
+facebook = [ # source: airflow/providers/facebook/provider.yaml
"facebook-business>=6.0.2",
]
-ftp = [
+ftp = [ # source: airflow/providers/ftp/provider.yaml
]
-github = [
+github = [ # source: airflow/providers/github/provider.yaml
"PyGithub!=1.58",
]
-google = [
+google = [ # source: airflow/providers/google/provider.yaml
"PyOpenSSL",
"apache-airflow[common_sql]",
"asgiref>=3.5.2",
@@ -754,34 +755,34 @@ google = [
"sqlalchemy-bigquery>=1.2.1",
"sqlalchemy-spanner>=1.6.2",
]
-grpc = [
+grpc = [ # source: airflow/providers/grpc/provider.yaml
"google-auth-httplib2>=0.0.1",
"google-auth>=1.0.0, <3.0.0",
"grpcio>=1.15.0",
]
-hashicorp = [
+hashicorp = [ # source: airflow/providers/hashicorp/provider.yaml
"hvac>=1.1.0",
]
-http = [
+http = [ # source: airflow/providers/http/provider.yaml
"aiohttp",
"asgiref",
"requests>=2.26.0",
"requests_toolbelt",
]
-imap = [
+imap = [ # source: airflow/providers/imap/provider.yaml
]
-influxdb = [
+influxdb = [ # source: airflow/providers/influxdb/provider.yaml
"influxdb-client>=1.19.0",
"requests>=2.26.0",
]
-jdbc = [
+jdbc = [ # source: airflow/providers/jdbc/provider.yaml
"apache-airflow[common_sql]",
"jaydebeapi>=1.1.1",
]
-jenkins = [
+jenkins = [ # source: airflow/providers/jenkins/provider.yaml
"python-jenkins>=1.0.0",
]
-microsoft-azure = [
+microsoft-azure = [ # source: airflow/providers/microsoft/azure/provider.yaml
"adal>=1.2.7",
"adlfs>=2023.10.0",
"azure-batch>=8.0.0",
@@ -806,147 +807,147 @@ microsoft-azure = [
# Devel dependencies for the microsoft.azure provider
"pywinrm",
]
-microsoft-mssql = [
+microsoft-mssql = [ # source: airflow/providers/microsoft/mssql/provider.yaml
"apache-airflow[common_sql]",
"pymssql>=2.1.8",
]
-microsoft-psrp = [
+microsoft-psrp = [ # source: airflow/providers/microsoft/psrp/provider.yaml
"pypsrp>=0.8.0",
]
-microsoft-winrm = [
+microsoft-winrm = [ # source: airflow/providers/microsoft/winrm/provider.yaml
"pywinrm>=0.4",
]
-mongo = [
+mongo = [ # source: airflow/providers/mongo/provider.yaml
"dnspython>=1.13.0",
"pymongo>=3.6.0",
# Devel dependencies for the mongo provider
"mongomock",
]
-mysql = [
+mysql = [ # source: airflow/providers/mysql/provider.yaml
"apache-airflow[common_sql]",
"mysql-connector-python>=8.0.29",
"mysqlclient>=1.3.6",
]
-neo4j = [
+neo4j = [ # source: airflow/providers/neo4j/provider.yaml
"neo4j>=4.2.1",
]
-odbc = [
+odbc = [ # source: airflow/providers/odbc/provider.yaml
"apache-airflow[common_sql]",
"pyodbc",
]
-openai = [
+openai = [ # source: airflow/providers/openai/provider.yaml
"openai[datalib]>=1.0",
]
-openfaas = [
+openfaas = [ # source: airflow/providers/openfaas/provider.yaml
]
-openlineage = [
+openlineage = [ # source: airflow/providers/openlineage/provider.yaml
"apache-airflow[common_sql]",
"attrs>=22.2",
"openlineage-integration-common>=0.28.0",
"openlineage-python>=0.28.0",
]
-opensearch = [
+opensearch = [ # source: airflow/providers/opensearch/provider.yaml
"opensearch-py>=2.2.0",
]
-opsgenie = [
+opsgenie = [ # source: airflow/providers/opsgenie/provider.yaml
"opsgenie-sdk>=2.1.5",
]
-oracle = [
+oracle = [ # source: airflow/providers/oracle/provider.yaml
"apache-airflow[common_sql]",
"oracledb>=1.0.0",
]
-pagerduty = [
+pagerduty = [ # source: airflow/providers/pagerduty/provider.yaml
"pdpyras>=4.1.2",
]
-papermill = [
+papermill = [ # source: airflow/providers/papermill/provider.yaml
"ipykernel",
"papermill[all]>=2.4.0",
"scrapbook[all]",
]
-pgvector = [
+pgvector = [ # source: airflow/providers/pgvector/provider.yaml
"apache-airflow[postgres]",
"pgvector>=0.2.3",
]
-pinecone = [
+pinecone = [ # source: airflow/providers/pinecone/provider.yaml
"pinecone-client>=2.2.4,<3.0",
]
-postgres = [
+postgres = [ # source: airflow/providers/postgres/provider.yaml
"apache-airflow[common_sql]",
"psycopg2-binary>=2.8.0",
]
-presto = [
+presto = [ # source: airflow/providers/presto/provider.yaml
"apache-airflow[common_sql]",
"pandas>=1.2.5",
"presto-python-client>=0.8.4",
]
-redis = [
+redis = [ # source: airflow/providers/redis/provider.yaml
"redis>=4.5.2,<5.0.0,!=4.5.5",
]
-salesforce = [
+salesforce = [ # source: airflow/providers/salesforce/provider.yaml
"pandas>=1.2.5",
"simple-salesforce>=1.0.0",
]
-samba = [
+samba = [ # source: airflow/providers/samba/provider.yaml
"smbprotocol>=1.5.0",
]
-segment = [
+segment = [ # source: airflow/providers/segment/provider.yaml
"analytics-python>=1.2.9",
]
-sendgrid = [
+sendgrid = [ # source: airflow/providers/sendgrid/provider.yaml
"sendgrid>=6.0.0",
]
-sftp = [
+sftp = [ # source: airflow/providers/sftp/provider.yaml
"apache-airflow[ssh]",
"paramiko>=2.8.0",
]
-singularity = [
+singularity = [ # source: airflow/providers/singularity/provider.yaml
"spython>=0.0.56",
]
-slack = [
+slack = [ # source: airflow/providers/slack/provider.yaml
"apache-airflow[common_sql]",
"slack_sdk>=3.19.0",
]
-smtp = [
+smtp = [ # source: airflow/providers/smtp/provider.yaml
]
-snowflake = [
+snowflake = [ # source: airflow/providers/snowflake/provider.yaml
"apache-airflow[common_sql]",
"snowflake-connector-python>=2.7.8",
"snowflake-sqlalchemy>=1.1.0",
]
-sqlite = [
+sqlite = [ # source: airflow/providers/sqlite/provider.yaml
"apache-airflow[common_sql]",
]
-ssh = [
+ssh = [ # source: airflow/providers/ssh/provider.yaml
"paramiko>=2.6.0",
"sshtunnel>=0.3.2",
]
-tableau = [
+tableau = [ # source: airflow/providers/tableau/provider.yaml
"tableauserverclient",
]
-tabular = [
+tabular = [ # source: airflow/providers/tabular/provider.yaml
# Devel dependencies for the tabular provider
"pyiceberg>=0.5.0",
]
-telegram = [
+telegram = [ # source: airflow/providers/telegram/provider.yaml
"python-telegram-bot>=20.2",
]
-trino = [
+trino = [ # source: airflow/providers/trino/provider.yaml
"apache-airflow[common_sql]",
"pandas>=1.2.5",
"trino>=0.318.0",
]
-vertica = [
+vertica = [ # source: airflow/providers/vertica/provider.yaml
"apache-airflow[common_sql]",
"vertica-python>=0.5.1",
]
-weaviate = [
+weaviate = [ # source: airflow/providers/weaviate/provider.yaml
"pandas>=1.2.5",
"weaviate-client>=3.24.2",
]
-yandex = [
+yandex = [ # source: airflow/providers/yandex/provider.yaml
"yandexcloud>=0.228.0",
]
-zendesk = [
+zendesk = [ # source: airflow/providers/zendesk/provider.yaml
"zenpy>=2.0.24",
]
all = [
diff --git a/scripts/ci/pre_commit/pre_commit_update_providers_dependencies.py
b/scripts/ci/pre_commit/pre_commit_update_providers_dependencies.py
index 50cebee3e5..ca502e0d0b 100755
--- a/scripts/ci/pre_commit/pre_commit_update_providers_dependencies.py
+++ b/scripts/ci/pre_commit/pre_commit_update_providers_dependencies.py
@@ -20,6 +20,7 @@ from __future__ import annotations
import hashlib
import json
import os
+import re
import sys
from ast import Import, ImportFrom, NodeVisitor, parse
from collections import defaultdict
@@ -237,7 +238,10 @@ def generate_dependencies(
for dependency, dependency_info in dependencies.items():
if dependency_info["state"] in ["suspended", "removed"]:
continue
- result_content.append(f"{normalize_extra(dependency)} = [")
+ result_content.append(
+ f"{normalize_extra(dependency)} = "
+ f"[ # source: airflow/providers/{dependency.replace('.',
'/')}/provider.yaml"
+ )
deps = dependency_info["deps"]
if not isinstance(deps, list):
raise TypeError(f"Wrong type of 'deps' {deps} for {dependency} in
{DEPENDENCIES_JSON_FILE_PATH}")
@@ -280,7 +284,7 @@ def get_dependency_type(dependency_type: str) ->
ParsedDependencyTypes | None:
return None
-def update_pyproject_toml(dependencies: dict[str, dict[str, list[str] | str]]):
+def update_pyproject_toml(dependencies: dict[str, dict[str, list[str] | str]],
dependencies_hash: str):
file_content = PYPROJECT_TOML_FILE_PATH.read_text()
result_content: list[str] = []
copying = True
@@ -291,6 +295,7 @@ def update_pyproject_toml(dependencies: dict[str, dict[str,
list[str] | str]]):
result_content.append(line)
if line.strip().startswith(GENERATED_DEPENDENCIES_START):
copying = False
+ result_content.append(f"# Hash of dependencies:
{dependencies_hash}")
generate_dependencies(result_content, dependencies)
elif line.strip().startswith(GENERATED_DEPENDENCIES_END):
copying = True
@@ -325,6 +330,16 @@ def calculate_my_hash():
return hash_md5.hexdigest()
+def calculate_dependencies_hash(dependencies: str):
+ my_file = MY_FILE.resolve()
+ hash_md5 = hashlib.md5()
+ hash_md5.update(my_file.read_bytes())
+ hash_md5.update(dependencies.encode(encoding="utf-8"))
+ return hash_md5.hexdigest()
+
+
+HASH_REGEXP = re.compile(r"# Hash of dependencies: (?P<hash>[a-f0-9]+)")
+
if __name__ == "__main__":
find_all_providers_and_provider_files()
num_files = len(ALL_PROVIDER_FILES)
@@ -367,7 +382,14 @@ if __name__ == "__main__":
new_dependencies = json.dumps(unique_sorted_dependencies, indent=2) + "\n"
old_md5sum = MY_MD5SUM_FILE.read_text().strip() if MY_MD5SUM_FILE.exists()
else ""
new_md5sum = calculate_my_hash()
- if new_dependencies != old_dependencies or new_md5sum != old_md5sum:
+ find_hash = HASH_REGEXP.findall(PYPROJECT_TOML_FILE_PATH.read_text())
+ dependencies_hash_from_pyproject_toml = find_hash[0] if find_hash else ""
+ dependencies_hash = calculate_dependencies_hash(new_dependencies)
+ if (
+ new_dependencies != old_dependencies
+ or new_md5sum != old_md5sum
+ or dependencies_hash_from_pyproject_toml != dependencies_hash
+ ):
DEPENDENCIES_JSON_FILE_PATH.write_text(json.dumps(unique_sorted_dependencies,
indent=2) + "\n")
if os.environ.get("CI"):
console.print()
@@ -386,7 +408,7 @@ if __name__ == "__main__":
)
console.print(f"Written {DEPENDENCIES_JSON_FILE_PATH}")
console.print()
- update_pyproject_toml(unique_sorted_dependencies)
+ update_pyproject_toml(unique_sorted_dependencies,
dependencies_hash)
console.print(f"Written {PYPROJECT_TOML_FILE_PATH}")
console.print()
MY_MD5SUM_FILE.write_text(new_md5sum + "\n")
diff --git
a/scripts/ci/pre_commit/pre_commit_update_providers_dependencies.py.md5sum
b/scripts/ci/pre_commit/pre_commit_update_providers_dependencies.py.md5sum
index 610f5562c8..0bce5d16b0 100644
--- a/scripts/ci/pre_commit/pre_commit_update_providers_dependencies.py.md5sum
+++ b/scripts/ci/pre_commit/pre_commit_update_providers_dependencies.py.md5sum
@@ -1 +1 @@
-ed25c4f6b220c14b40bbf370fee9388e
+5f442e24a09b079464bde7b552f812d1