This is an automated email from the ASF dual-hosted git repository.
potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/main by this push:
new 4e3c96f58a4 Fix generation of commit history after moving providers
(#43412)
4e3c96f58a4 is described below
commit 4e3c96f58a4aee6a5151b9bc4bd55768e7fc0a54
Author: Jarek Potiuk <[email protected]>
AuthorDate: Sun Oct 27 12:39:52 2024 +0100
Fix generation of commit history after moving providers (#43412)
Another teething problem after moving providers in #42505. After
moving providers, the history of the current folder in "providers"
only contains changes after the move - it does not include
changes from before the move - and since we always regenerate
the full list of commits - they were missing.
We cannot use `--follow` - because `git log --follow` only
works for single files, not directories, but since the move
was very predictable ("airflow/providers/nnn" ->
"airflow/providers/src/airflow/providers/nnn") we can add the old
path to `git log` command to get both - pre and post move commit
history.
---
.../prepare_providers/provider_documentation.py | 33 +++++++++++++++-------
dev/breeze/src/airflow_breeze/utils/packages.py | 7 +++++
dev/breeze/src/airflow_breeze/utils/path_utils.py | 1 +
dev/breeze/tests/test_provider_documentation.py | 30 ++++++++++++++++----
4 files changed, 56 insertions(+), 15 deletions(-)
diff --git
a/dev/breeze/src/airflow_breeze/prepare_providers/provider_documentation.py
b/dev/breeze/src/airflow_breeze/prepare_providers/provider_documentation.py
index dc14ea6d08d..fc2d55f86e2 100644
--- a/dev/breeze/src/airflow_breeze/prepare_providers/provider_documentation.py
+++ b/dev/breeze/src/airflow_breeze/prepare_providers/provider_documentation.py
@@ -49,6 +49,7 @@ from airflow_breeze.utils.packages import (
refresh_provider_metadata_with_provider_id,
render_template,
)
+from airflow_breeze.utils.path_utils import AIRFLOW_SOURCES_ROOT
from airflow_breeze.utils.run_utils import run_command
from airflow_breeze.utils.shared_options import get_verbose
from airflow_breeze.utils.versions import get_version_tag
@@ -186,11 +187,14 @@ TYPE_OF_CHANGE_DESCRIPTION = {
}
-def _get_git_log_command(from_commit: str | None = None, to_commit: str | None
= None) -> list[str]:
+def _get_git_log_command(
+ folder_paths: list[Path] | None = None, from_commit: str | None = None,
to_commit: str | None = None
+) -> list[str]:
"""Get git command to run for the current repo from the current folder.
The current directory should always be the package folder.
+ :param folder_paths: list of folder paths to check for changes
:param from_commit: if present - base commit from which to start the log
from
:param to_commit: if present - final commit which should be the start of
the log
:return: git command to run
@@ -207,7 +211,8 @@ def _get_git_log_command(from_commit: str | None = None,
to_commit: str | None =
git_cmd.append(from_commit)
elif to_commit:
raise ValueError("It makes no sense to specify to_commit without
from_commit.")
- git_cmd.extend(["--", "."])
+ folders = [folder_path.as_posix() for folder_path in folder_paths] if
folder_paths else ["."]
+ git_cmd.extend(["--", *folders])
return git_cmd
@@ -307,18 +312,24 @@ def _get_all_changes_for_package(
get_console().print(f"[info]Checking if tag '{current_tag_no_suffix}'
exist.")
result = run_command(
["git", "rev-parse", current_tag_no_suffix],
- cwd=provider_details.source_provider_package_path,
+ cwd=AIRFLOW_SOURCES_ROOT,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
check=False,
)
+ providers_folder_paths = [
+ provider_details.source_provider_package_path,
+ provider_details.old_source_provider_package_path,
+ ]
if not reapply_templates_only and result.returncode == 0:
if get_verbose():
get_console().print(f"[info]The tag {current_tag_no_suffix}
exists.")
# The tag already exists
result = run_command(
- _get_git_log_command(f"{HTTPS_REMOTE}/{base_branch}",
current_tag_no_suffix),
- cwd=provider_details.source_provider_package_path,
+ _get_git_log_command(
+ providers_folder_paths, f"{HTTPS_REMOTE}/{base_branch}",
current_tag_no_suffix
+ ),
+ cwd=AIRFLOW_SOURCES_ROOT,
capture_output=True,
text=True,
check=True,
@@ -333,8 +344,10 @@ def _get_all_changes_for_package(
last_doc_only_hash = doc_only_change_file.read_text().strip()
try:
result = run_command(
- _get_git_log_command(f"{HTTPS_REMOTE}/{base_branch}",
last_doc_only_hash),
- cwd=provider_details.source_provider_package_path,
+ _get_git_log_command(
+ providers_folder_paths,
f"{HTTPS_REMOTE}/{base_branch}", last_doc_only_hash
+ ),
+ cwd=AIRFLOW_SOURCES_ROOT,
capture_output=True,
text=True,
check=True,
@@ -387,8 +400,8 @@ def _get_all_changes_for_package(
for version in provider_details.versions[1:]:
version_tag = get_version_tag(version, provider_package_id)
result = run_command(
- _get_git_log_command(next_version_tag, version_tag),
- cwd=provider_details.source_provider_package_path,
+ _get_git_log_command(providers_folder_paths, next_version_tag,
version_tag),
+ cwd=AIRFLOW_SOURCES_ROOT,
capture_output=True,
text=True,
check=True,
@@ -402,7 +415,7 @@ def _get_all_changes_for_package(
next_version_tag = version_tag
current_version = version
result = run_command(
- _get_git_log_command(next_version_tag),
+ _get_git_log_command(providers_folder_paths, next_version_tag),
cwd=provider_details.source_provider_package_path,
capture_output=True,
text=True,
diff --git a/dev/breeze/src/airflow_breeze/utils/packages.py
b/dev/breeze/src/airflow_breeze/utils/packages.py
index 5ee2c2a3edf..9f1eae36938 100644
--- a/dev/breeze/src/airflow_breeze/utils/packages.py
+++ b/dev/breeze/src/airflow_breeze/utils/packages.py
@@ -36,6 +36,7 @@ from airflow_breeze.global_constants import (
)
from airflow_breeze.utils.console import get_console
from airflow_breeze.utils.path_utils import (
+ AIRFLOW_OLD_PROVIDERS_DIR,
AIRFLOW_PROVIDERS_NS_PACKAGE,
BREEZE_SOURCES_ROOT,
DOCS_ROOT,
@@ -75,6 +76,7 @@ class ProviderPackageDetails(NamedTuple):
full_package_name: str
pypi_package_name: str
source_provider_package_path: Path
+ old_source_provider_package_path: Path
documentation_provider_package_path: Path
changelog_path: Path
provider_description: str
@@ -385,6 +387,10 @@ def get_source_package_path(provider_id: str) -> Path:
return AIRFLOW_PROVIDERS_NS_PACKAGE.joinpath(*provider_id.split("."))
+def get_old_source_package_path(provider_id: str) -> Path:
+ return AIRFLOW_OLD_PROVIDERS_DIR.joinpath(*provider_id.split("."))
+
+
def get_documentation_package_path(provider_id: str) -> Path:
return DOCS_ROOT / f"apache-airflow-providers-{provider_id.replace('.',
'-')}"
@@ -515,6 +521,7 @@ def get_provider_details(provider_id: str) ->
ProviderPackageDetails:
full_package_name=f"airflow.providers.{provider_id}",
pypi_package_name=f"apache-airflow-providers-{provider_id.replace('.',
'-')}",
source_provider_package_path=get_source_package_path(provider_id),
+
old_source_provider_package_path=get_old_source_package_path(provider_id),
documentation_provider_package_path=get_documentation_package_path(provider_id),
changelog_path=get_source_package_path(provider_id) / "CHANGELOG.rst",
provider_description=provider_info["description"],
diff --git a/dev/breeze/src/airflow_breeze/utils/path_utils.py
b/dev/breeze/src/airflow_breeze/utils/path_utils.py
index 4e510cb7680..0feba56356b 100644
--- a/dev/breeze/src/airflow_breeze/utils/path_utils.py
+++ b/dev/breeze/src/airflow_breeze/utils/path_utils.py
@@ -281,6 +281,7 @@ def find_airflow_sources_root_to_operate_on() -> Path:
AIRFLOW_SOURCES_ROOT = find_airflow_sources_root_to_operate_on().resolve()
AIRFLOW_WWW_DIR = AIRFLOW_SOURCES_ROOT / "airflow" / "www"
AIRFLOW_UI_DIR = AIRFLOW_SOURCES_ROOT / "airflow" / "ui"
+AIRFLOW_OLD_PROVIDERS_DIR = AIRFLOW_SOURCES_ROOT / "airflow" / "providers"
AIRFLOW_PROVIDERS_PROJECT = AIRFLOW_SOURCES_ROOT / "providers"
AIRFLOW_PROVIDERS_SRC = AIRFLOW_PROVIDERS_PROJECT / "src"
AIRFLOW_PROVIDERS_NS_PACKAGE = AIRFLOW_PROVIDERS_SRC / "airflow" / "providers"
diff --git a/dev/breeze/tests/test_provider_documentation.py
b/dev/breeze/tests/test_provider_documentation.py
index db770b7856a..0a56e158bcf 100644
--- a/dev/breeze/tests/test_provider_documentation.py
+++ b/dev/breeze/tests/test_provider_documentation.py
@@ -18,6 +18,7 @@ from __future__ import annotations
import random
import string
+from pathlib import Path
import pytest
@@ -97,28 +98,47 @@ def test_get_version_tag(version: str, provider_id: str,
suffix: str, tag: str):
@pytest.mark.parametrize(
- "from_commit, to_commit, git_command",
+ "folder_paths, from_commit, to_commit, git_command",
[
- (None, None, ["git", "log", "--pretty=format:%H %h %cd %s",
"--date=short", "--", "."]),
+ (None, None, None, ["git", "log", "--pretty=format:%H %h %cd %s",
"--date=short", "--", "."]),
(
+ None,
"from_tag",
None,
["git", "log", "--pretty=format:%H %h %cd %s", "--date=short",
"from_tag", "--", "."],
),
(
+ None,
"from_tag",
"to_tag",
["git", "log", "--pretty=format:%H %h %cd %s", "--date=short",
"from_tag...to_tag", "--", "."],
),
+ (
+ [Path("a"), Path("b")],
+ "from_tag",
+ "to_tag",
+ [
+ "git",
+ "log",
+ "--pretty=format:%H %h %cd %s",
+ "--date=short",
+ "from_tag...to_tag",
+ "--",
+ "a",
+ "b",
+ ],
+ ),
],
)
-def test_get_git_log_command(from_commit: str | None, to_commit: str | None,
git_command: list[str]):
- assert _get_git_log_command(from_commit, to_commit) == git_command
+def test_get_git_log_command(
+ folder_paths: list[str] | None, from_commit: str | None, to_commit: str |
None, git_command: list[str]
+):
+ assert _get_git_log_command(folder_paths, from_commit, to_commit) ==
git_command
def test_get_git_log_command_wrong():
with pytest.raises(ValueError, match=r"to_commit without from_commit"):
- _get_git_log_command(None, "to_commit")
+ _get_git_log_command(None, None, "to_commit")
@pytest.mark.parametrize(