This is an automated email from the ASF dual-hosted git repository.
potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/main by this push:
new 1024802d68 Optimize docs building in CI (#30825)
1024802d68 is described below
commit 1024802d68c13452a94ebbf7c859abd6e19d6419
Author: Jarek Potiuk <[email protected]>
AuthorDate: Mon Apr 24 08:03:25 2023 +0200
Optimize docs building in CI (#30825)
* Optimize docs building in CI
Docs building is the longest build for regular PRs - it takes 30 minutes
for any PR that touches any of the docs or python files.
This PR optimises it - only the affected packages will be built when
the PR touches only some of the files.
* fixup! Optimize docs building in CI
* fixup! fixup! Optimize docs building in CI
* fixup! fixup! fixup! Optimize docs building in CI
---
dev/breeze/SELECTIVE_CHECKS.md | 96 ++++++++-------
.../src/airflow_breeze/utils/selective_checks.py | 57 +++++++--
dev/breeze/tests/test_selective_checks.py | 133 +++++++++++++++++++++
3 files changed, 229 insertions(+), 57 deletions(-)
diff --git a/dev/breeze/SELECTIVE_CHECKS.md b/dev/breeze/SELECTIVE_CHECKS.md
index 73827a807a..0c6bfa51b3 100644
--- a/dev/breeze/SELECTIVE_CHECKS.md
+++ b/dev/breeze/SELECTIVE_CHECKS.md
@@ -103,51 +103,57 @@ The logic implements the following rules:
* if `Image building` is disabled, only basic pre-commits are enabled - no
'image-depending` pre-commits
are enabled.
* If there are some setup files changed, `upgrade to newer dependencies` is
enabled.
+* If docs are build, the `docs-filter` will determine which docs packages to
build. This is based on
+ several criteria: if any of the airflow core, charts, docker-stack,
providers files or docs have changed,
+ then corresponding packages are build (including cross-dependent providers).
If any of the core files
+ changed, also providers docs are built because all providers depend on
airflow docs. If any of the docs
+ build python files changed or when build is "canary" type in main - all docs
packages are built.
The selective check outputs available are described below:
-| Output | Meaning of the output
| Example value
|
-|------------------------------------|--------------------------------------------------------------------------------------------------------|---------------------------------------------------------------|
-| all-python-versions | List of all python versions there are
available in the form of JSON array | ['3.7',
'3.8', '3.9', '3.10'] |
-| all-python-versions-list-as-string | List of all python versions there are
available in the form of space separated string | 3.7 3.8 3.9
3.10 |
-| basic-checks-only | Whether to run all static checks
("false") or only basic set of static checks ("true") | false
|
-| cache-directive | Which cache should be be used for
images ("registry", "local" , "disabled") | registry
|
-| debug-resources | Whether resources usage should be
printed during parallel job execution ("true"/ "false") | false
|
-| default-branch | Which branch is default for the the
build ("main" for main branch, "v2-4-test" for 2.4 line etc.) | main
|
-| default-constraints-branch | Which branch is default for the the
build ("constraints-main" for main branch, "constraints-2-4" etc.) |
constraints-main |
-| default-helm-version | Which Helm version to use as default
| v3.9.4
|
-| default-kind-version | Which Kind version to use as default
| v0.16.0
|
-| default-kubernetes-version | Which Kubernetes version to use as
default | v1.25.2
|
-| default-mssql-version | Which MsSQL version to use as default
| 2017-latest
|
-| default-mysql-version | Which MySQL version to use as default
| 5.7
|
-| default-postgres-version | Which Postgres version to use as
default | 10
|
-| default-python-version | Which Python version to use as default
| 3.7
|
-| docs-build | Whether to build documentation
("true"/"false") | true
|
-| docs-filter | What filter to apply to docs building -
used in non-main branches to skip provider and chart docs. |
--package-filter apache-airflow --package-filter docker-stack |
-| full-tests-needed | Whether this build runs complete set of
tests or only subset (for faster PR builds). | false
|
-| helm-version | Which Helm version to use for tests
| v3.9.4
|
-| image-build | Whether CI image build is needed
| true
|
-| kind-version | Which Kind version to use for tests
| v0.16.0
|
-| kubernetes-combos | All combinations of Python version and
Kubernetes version to use for tests as space-separated string | 3.7-v1.25.2
3.8-v1.26.4 |
-| kubernetes-versions | All Kubernetes versions to use for
tests as JSON array |
['v1.25.2'] |
-| kubernetes-versions-list-as-string | All Kubernetes versions to use for
tests as space-separated string | v1.25.2
|
-| mssql-exclude | Which versions of MsSQL to exclude for
tests as JSON array | []
|
-| mssql-versions | Which versions of MsSQL to use for
tests as JSON array |
['2017-latest'] |
-| mysql-exclude | Which versions of MySQL to exclude for
tests as JSON array | []
|
-| mysql-versions | Which versions of MySQL to use for
tests as JSON array | ['5.7']
|
-| needs-api-codegen | Whether "api-codegen" are needed to run
("true"/"false") | true
|
-| needs-api-tests | Whether "api-tests" are needed to run
("true"/"false") | true
|
-| needs-helm-tests | Whether Helm tests are needed to run
("true"/"false") | true
|
-| needs-javascript-scans | Whether javascript CodeQL scans should
be run ("true"/"false") | true
|
-| needs-python-scans | Whether Python CodeQL scans should be
run ("true"/"false") | true
|
-| postgres-exclude | Which versions of Postgres to exclude
for tests as JSON array | []
|
-| postgres-versions | Which versions of Postgres to use for
tests as JSON array | ['10']
|
-| python-versions | Which versions of Python to use for
tests as JSON array | ['3.7']
|
-| python-versions-list-as-string | Which versions of MySQL to use for
tests as space-separated string | 3.7
|
-| run-kubernetes-tests | Whether Kubernetes tests should be run
("true"/"false") | true
|
-| run-tests | Whether unit tests should be run
("true"/"false") | true
|
-| run-www-tests | Whether WWW tests should be run
("true"/"false") | true
|
-| skip-pre-commits | Which pre-commits should be skipped
during the static-checks run | identity
|
-| sqlite-exclude | Which versions of Sqlite to exclude for
tests as JSON array | []
|
-| test-types | Which test types should be run for unit
tests | API Always CLI
Core Integration Other Providers WWW |
-| upgrade-to-newer-dependencies | Whether the image build should attempt
to upgrade all dependencies | false
|
+| Output | Meaning of the output
| Example value
|
+|------------------------------------|---------------------------------------------------------------------------------------------------------|-----------------------------------------------------|
+| all-python-versions | List of all python versions there are
available in the form of JSON array | ['3.7',
'3.8', '3.9', '3.10'] |
+| all-python-versions-list-as-string | List of all python versions there are
available in the form of space separated string | 3.7 3.8 3.9
3.10 |
+| basic-checks-only | Whether to run all static checks
("false") or only basic set of static checks ("true") | false
|
+| cache-directive | Which cache should be be used for
images ("registry", "local" , "disabled") |
registry |
+| debug-resources | Whether resources usage should be
printed during parallel job execution ("true"/ "false") | false
|
+| default-branch | Which branch is default for the the
build ("main" for main branch, "v2-4-test" for 2.4 line etc.) | main
|
+| default-constraints-branch | Which branch is default for the the
build ("constraints-main" for main branch, "constraints-2-4" etc.) |
constraints-main |
+| default-helm-version | Which Helm version to use as default
| v3.9.4
|
+| default-kind-version | Which Kind version to use as default
| v0.16.0
|
+| default-kubernetes-version | Which Kubernetes version to use as
default | v1.25.2
|
+| default-mssql-version | Which MsSQL version to use as default
| 2017-latest
|
+| default-mysql-version | Which MySQL version to use as default
| 5.7
|
+| default-postgres-version | Which Postgres version to use as
default | 10
|
+| default-python-version | Which Python version to use as default
| 3.7
|
+| docs-build | Whether to build documentation
("true"/"false") | true
|
+| docs-filter | What filter to apply to docs building -
based on which documentation packages should be built |
--package-filter apache-airflow - |
+| full-tests-needed | Whether this build runs complete set of
tests or only subset (for faster PR builds) | false
|
+| helm-version | Which Helm version to use for tests
| v3.9.4
|
+| image-build | Whether CI image build is needed
| true
|
+| kind-version | Which Kind version to use for tests
| v0.16.0
|
+| kubernetes-combos | All combinations of Python version and
Kubernetes version to use for tests as space-separated string | 3.7-v1.25.2
3.8-v1.26.4 |
+| kubernetes-versions | All Kubernetes versions to use for
tests as JSON array |
['v1.25.2'] |
+| kubernetes-versions-list-as-string | All Kubernetes versions to use for
tests as space-separated string | v1.25.2
|
+| mssql-exclude | Which versions of MsSQL to exclude for
tests as JSON array | []
|
+| mssql-versions | Which versions of MsSQL to use for
tests as JSON array |
['2017-latest'] |
+| mysql-exclude | Which versions of MySQL to exclude for
tests as JSON array | []
|
+| mysql-versions | Which versions of MySQL to use for
tests as JSON array | ['5.7']
|
+| needs-api-codegen | Whether "api-codegen" are needed to run
("true"/"false") | true
|
+| needs-api-tests | Whether "api-tests" are needed to run
("true"/"false") | true
|
+| needs-helm-tests | Whether Helm tests are needed to run
("true"/"false") | true
|
+| needs-javascript-scans | Whether javascript CodeQL scans should
be run ("true"/"false") | true
|
+| needs-python-scans | Whether Python CodeQL scans should be
run ("true"/"false") | true
|
+| parallel-test-types | Which test types should be run for unit
tests | API Always
Providers\[amazon\] Providers\[-amazon\] |
+| postgres-exclude | Which versions of Postgres to exclude
for tests as JSON array | []
|
+| postgres-versions | Which versions of Postgres to use for
tests as JSON array | ['10']
|
+| python-versions | Which versions of Python to use for
tests as JSON array | ['3.7']
|
+| python-versions-list-as-string | Which versions of MySQL to use for
tests as space-separated string | 3.7
|
+| run-kubernetes-tests | Whether Kubernetes tests should be run
("true"/"false") | true
|
+| run-tests | Whether unit tests should be run
("true"/"false") | true
|
+| run-www-tests | Whether WWW tests should be run
("true"/"false") | true
|
+| skip-pre-commits | Which pre-commits should be skipped
during the static-checks run | identity
|
+| skip-provider-tests | When provider tests should be skipped
(on non-main branch) | identity
|
+| sqlite-exclude | Which versions of Sqlite to exclude for
tests as JSON array | []
|
+| upgrade-to-newer-dependencies | Whether the image build should attempt
to upgrade all dependencies (might be true/false or commit hash) | false
|
diff --git a/dev/breeze/src/airflow_breeze/utils/selective_checks.py
b/dev/breeze/src/airflow_breeze/utils/selective_checks.py
index 97fa9d6ff5..a54e7c86e1 100644
--- a/dev/breeze/src/airflow_breeze/utils/selective_checks.py
+++ b/dev/breeze/src/airflow_breeze/utils/selective_checks.py
@@ -27,6 +27,7 @@ from airflow_breeze.utils.kubernetes_utils import
get_kubernetes_python_combos
from airflow_breeze.utils.path_utils import (
AIRFLOW_PROVIDERS_ROOT,
AIRFLOW_SOURCES_ROOT,
+ DOCS_DIR,
SYSTEM_TESTS_PROVIDERS_ROOT,
TESTS_PROVIDERS_ROOT,
)
@@ -41,6 +42,8 @@ from functools import lru_cache
from re import match
from typing import Any, Dict, List, TypeVar
+from typing_extensions import Literal
+
from airflow_breeze.global_constants import (
ALL_PYTHON_MAJOR_MINOR_VERSIONS,
CURRENT_KUBERNETES_VERSIONS,
@@ -195,7 +198,7 @@ TEST_TYPE_MATCHES = HashableDict(
)
-def find_provider_affected(changed_file: str) -> str | None:
+def find_provider_affected(changed_file: str, include_docs: bool) -> str |
None:
file_path = AIRFLOW_SOURCES_ROOT / changed_file
# is_relative_to is only available in Python 3.9 - we should simplify this
check when we are Python 3.9+
for provider_root in (TESTS_PROVIDERS_ROOT, SYSTEM_TESTS_PROVIDERS_ROOT,
AIRFLOW_PROVIDERS_ROOT):
@@ -206,6 +209,13 @@ def find_provider_affected(changed_file: str) -> str |
None:
except ValueError:
pass
else:
+ if include_docs:
+ try:
+ relative_path = file_path.relative_to(DOCS_DIR)
+ if
relative_path.parts[0].startswith("apache-airflow-providers-"):
+ return
relative_path.parts[0].replace("apache-airflow-providers-", "").replace("-",
".")
+ except ValueError:
+ pass
return None
for parent_dir_path in file_path.parents:
@@ -230,13 +240,15 @@ def add_dependent_providers(
providers.add(dep_name)
-def find_all_providers_affected(changed_files: tuple[str, ...]) -> set[str]:
+def find_all_providers_affected(
+ changed_files: tuple[str, ...], include_docs: bool
+) -> list[str] | Literal["ALL_PROVIDERS"] | None:
all_providers: set[str] = set()
dependencies = json.loads((AIRFLOW_SOURCES_ROOT / "generated" /
"provider_dependencies.json").read_text())
all_providers_affected = False
suspended_providers: set[str] = set()
for changed_file in changed_files:
- provider = find_provider_affected(changed_file)
+ provider = find_provider_affected(changed_file,
include_docs=include_docs)
if provider == "Providers":
all_providers_affected = True
elif provider is not None:
@@ -245,7 +257,7 @@ def find_all_providers_affected(changed_files: tuple[str,
...]) -> set[str]:
else:
all_providers.add(provider)
if all_providers_affected:
- return set()
+ return "ALL_PROVIDERS"
if suspended_providers:
# We check for suspended providers only after we have checked if all
providers are affected.
# No matter if we found that we are modifying a suspended provider
individually, if all providers are
@@ -265,9 +277,11 @@ def find_all_providers_affected(changed_files: tuple[str,
...]) -> set[str]:
)
get_console().print(f"Suspended providers: {suspended_providers}")
sys.exit(1)
+ if len(all_providers) == 0:
+ return None
for provider in list(all_providers):
add_dependent_providers(all_providers, provider, dependencies)
- return all_providers
+ return sorted(all_providers)
class SelectiveChecks:
@@ -570,8 +584,10 @@ class SelectiveChecks:
candidate_test_types.update(all_selective_test_types())
else:
if "Providers" in candidate_test_types:
- affected_providers =
find_all_providers_affected(changed_files=self._files)
- if len(affected_providers) != 0:
+ affected_providers = find_all_providers_affected(
+ changed_files=self._files, include_docs=False
+ )
+ if affected_providers != "ALL_PROVIDERS" and
affected_providers is not None:
candidate_test_types.remove("Providers")
candidate_test_types.add(f"Providers[{','.join(sorted(affected_providers))}]")
get_console().print(
@@ -662,11 +678,28 @@ class SelectiveChecks:
@cached_property
def docs_filter(self) -> str:
- return (
- ""
- if self._default_branch == "main"
- else "--package-filter apache-airflow --package-filter
docker-stack"
- )
+ if self._default_branch != "main":
+ return "--package-filter apache-airflow --package-filter
docker-stack"
+ if self.full_tests_needed:
+ return ""
+ providers_affected =
find_all_providers_affected(changed_files=self._files, include_docs=True)
+ if (
+ providers_affected == "ALL_PROVIDERS"
+ or "docs/conf.py" in self._files
+ or "docs/build_docs.py" in self._files
+ ):
+ return ""
+ packages = []
+ if any([file.startswith("airflow/") for file in self._files]):
+ packages.append("apache-airflow")
+ if any([file.startswith("chart/") or
file.startswith("docs/helm-chart") for file in self._files]):
+ packages.append("helm-chart")
+ if any([file.startswith("docs/docker-stack/") for file in
self._files]):
+ packages.append("docker-stack")
+ if providers_affected:
+ for provider in providers_affected:
+
packages.append(f"apache-airflow-providers-{provider.replace('.', '-')}")
+ return " ".join([f"--package-filter {package}" for package in
packages])
@cached_property
def skip_pre_commits(self) -> str:
diff --git a/dev/breeze/tests/test_selective_checks.py
b/dev/breeze/tests/test_selective_checks.py
index a44d6f4dba..aa475df886 100644
--- a/dev/breeze/tests/test_selective_checks.py
+++ b/dev/breeze/tests/test_selective_checks.py
@@ -374,6 +374,7 @@ def test_expected_output_pull_request_main(
"image-build": "true",
"run-tests": "true",
"docs-build": "true",
+ "docs-filter": "",
"full-tests-needed": "true",
"upgrade-to-newer-dependencies": "false",
"parallel-test-types": "Core Providers[-amazon,google]
Other Providers[amazon] WWW "
@@ -398,6 +399,7 @@ def test_expected_output_pull_request_main(
"image-build": "true",
"run-tests": "true",
"docs-build": "true",
+ "docs-filter": "",
"full-tests-needed": "true",
"upgrade-to-newer-dependencies": "false",
"parallel-test-types": "Core Providers[-amazon,google]
Other Providers[amazon] WWW "
@@ -420,6 +422,7 @@ def test_expected_output_pull_request_main(
"image-build": "true",
"run-tests": "true",
"docs-build": "true",
+ "docs-filter": "",
"full-tests-needed": "true",
"upgrade-to-newer-dependencies": "false",
"parallel-test-types": "Core Providers[-amazon,google]
Other Providers[amazon] WWW "
@@ -669,6 +672,7 @@ def test_expected_output_pull_request_target(
"needs-helm-tests": "true",
"run-tests": "true",
"docs-build": "true",
+ "docs-filter": "",
"upgrade-to-newer-dependencies": "true",
"parallel-test-types": "Core Providers[-amazon,google] Other
Providers[amazon] WWW "
"API Always CLI Providers[google]",
@@ -686,6 +690,7 @@ def test_expected_output_pull_request_target(
"needs-helm-tests": "false",
"run-tests": "true",
"docs-build": "true",
+ "docs-filter": "--package-filter apache-airflow
--package-filter docker-stack",
"upgrade-to-newer-dependencies": "true",
"parallel-test-types": "Core Other WWW API Always CLI",
},
@@ -703,6 +708,7 @@ def test_expected_output_pull_request_target(
"needs-helm-tests": "true",
"run-tests": "true",
"docs-build": "true",
+ "docs-filter": "",
"upgrade-to-newer-dependencies": "true",
"parallel-test-types": "Core Providers[-amazon,google] Other
Providers[amazon] WWW "
"API Always CLI Providers[google]",
@@ -812,3 +818,130 @@ def test_upgrade_to_newer_dependencies(files: tuple[str,
...], expected_outputs:
default_branch="main",
)
assert_outputs_are_printed(expected_outputs, str(stderr))
+
+
[email protected](
+ "files, expected_outputs,",
+ [
+ pytest.param(
+ ("docs/apache-airflow-providers-google/docs.rst",),
+ {
+ "docs-filter": "--package-filter
apache-airflow-providers-amazon "
+ "--package-filter apache-airflow-providers-apache-beam "
+ "--package-filter apache-airflow-providers-apache-cassandra "
+ "--package-filter apache-airflow-providers-cncf-kubernetes "
+ "--package-filter apache-airflow-providers-common-sql "
+ "--package-filter apache-airflow-providers-facebook "
+ "--package-filter apache-airflow-providers-google "
+ "--package-filter apache-airflow-providers-hashicorp "
+ "--package-filter apache-airflow-providers-microsoft-azure "
+ "--package-filter apache-airflow-providers-microsoft-mssql "
+ "--package-filter apache-airflow-providers-mysql "
+ "--package-filter apache-airflow-providers-oracle "
+ "--package-filter apache-airflow-providers-postgres "
+ "--package-filter apache-airflow-providers-presto "
+ "--package-filter apache-airflow-providers-salesforce "
+ "--package-filter apache-airflow-providers-sftp "
+ "--package-filter apache-airflow-providers-ssh "
+ "--package-filter apache-airflow-providers-trino",
+ },
+ id="Google provider docs changed",
+ ),
+ pytest.param(
+ ("airflow/providers/common/sql/common_sql_python.py",),
+ {
+ "docs-filter": "--package-filter apache-airflow "
+ "--package-filter apache-airflow-providers-amazon "
+ "--package-filter apache-airflow-providers-apache-drill "
+ "--package-filter apache-airflow-providers-apache-druid "
+ "--package-filter apache-airflow-providers-apache-hive "
+ "--package-filter apache-airflow-providers-apache-impala "
+ "--package-filter apache-airflow-providers-apache-pinot "
+ "--package-filter apache-airflow-providers-common-sql "
+ "--package-filter apache-airflow-providers-databricks "
+ "--package-filter apache-airflow-providers-elasticsearch "
+ "--package-filter apache-airflow-providers-exasol "
+ "--package-filter apache-airflow-providers-google "
+ "--package-filter apache-airflow-providers-jdbc "
+ "--package-filter apache-airflow-providers-microsoft-mssql "
+ "--package-filter apache-airflow-providers-mysql "
+ "--package-filter apache-airflow-providers-odbc "
+ "--package-filter apache-airflow-providers-oracle "
+ "--package-filter apache-airflow-providers-postgres "
+ "--package-filter apache-airflow-providers-presto "
+ "--package-filter apache-airflow-providers-qubole "
+ "--package-filter apache-airflow-providers-slack "
+ "--package-filter apache-airflow-providers-snowflake "
+ "--package-filter apache-airflow-providers-sqlite "
+ "--package-filter apache-airflow-providers-trino "
+ "--package-filter apache-airflow-providers-vertica",
+ },
+ id="Common SQL provider package python files changed",
+ ),
+ pytest.param(
+ ("docs/apache-airflow-providers-airbyte/docs.rst",),
+ {
+ "docs-filter": "--package-filter
apache-airflow-providers-airbyte "
+ "--package-filter apache-airflow-providers-http",
+ },
+ id="Airbyte provider docs changed",
+ ),
+ pytest.param(
+ ("airflow/providers/celery/file.py",),
+ {
+ "docs-filter": "--package-filter apache-airflow "
+ "--package-filter apache-airflow-providers-celery",
+ },
+ id="Celery python files changed",
+ ),
+ pytest.param(
+ ("docs/conf.py",),
+ {
+ "docs-filter": "",
+ },
+ id="Docs conf.py changed",
+ ),
+ pytest.param(
+ ("airflow/test.py",),
+ {
+ "docs-filter": "--package-filter apache-airflow",
+ },
+ id="Core files changed. No provider docs to build",
+ ),
+ pytest.param(
+ ("docs/docker-stack/test.rst",),
+ {"docs-filter": "--package-filter docker-stack"},
+ id="Docker stack files changed. No provider docs to build",
+ ),
+ pytest.param(
+ ("airflow/test.py", "chart/airflow/values.yaml"),
+ {
+ "docs-filter": "--package-filter apache-airflow
--package-filter helm-chart",
+ },
+ id="Core files and helm chart files changed. No provider docs to
build",
+ ),
+ pytest.param(
+ ("chart/airflow/values.yaml",),
+ {
+ "docs-filter": "--package-filter helm-chart",
+ },
+ id="Helm chart files changed. No provider, airflow docs to build",
+ ),
+ pytest.param(
+ ("docs/helm-chart/airflow/values.yaml",),
+ {
+ "docs-filter": "--package-filter helm-chart",
+ },
+ id="Docs helm chart files changed. No provider, airflow docs to
build",
+ ),
+ ],
+)
+def test_docs_filter(files: tuple[str, ...], expected_outputs: dict[str, str]):
+ stderr = SelectiveChecks(
+ files=files,
+ commit_ref="HEAD",
+ github_event=GithubEvents.PULL_REQUEST,
+ pr_labels=(),
+ default_branch="main",
+ )
+ assert_outputs_are_printed(expected_outputs, str(stderr))