This is an automated email from the ASF dual-hosted git repository.

potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git


The following commit(s) were added to refs/heads/main by this push:
     new 4e3c96f58a4 Fix generation of commit history after moving providers 
(#43412)
4e3c96f58a4 is described below

commit 4e3c96f58a4aee6a5151b9bc4bd55768e7fc0a54
Author: Jarek Potiuk <[email protected]>
AuthorDate: Sun Oct 27 12:39:52 2024 +0100

    Fix generation of commit history after moving providers (#43412)
    
    Another teething problem after moving providers in #42505. After
    moving providers, the history of the current folder in "providers"
    only contains changes after the move - it does not include
    changes from before the move - and since we always regenerate
    the full list of commits - they were missing.
    
    We cannot use `--follow` - because `git log --follow` only
    works for single files, not directories, but since the move
    was very predictable ("airflow/providers/nnn" ->
    "airflow/providers/src/airflow/providers/nnn") we can add the old
    path to `git log` command to get both - pre and post move commit
    history.
---
 .../prepare_providers/provider_documentation.py    | 33 +++++++++++++++-------
 dev/breeze/src/airflow_breeze/utils/packages.py    |  7 +++++
 dev/breeze/src/airflow_breeze/utils/path_utils.py  |  1 +
 dev/breeze/tests/test_provider_documentation.py    | 30 ++++++++++++++++----
 4 files changed, 56 insertions(+), 15 deletions(-)

diff --git 
a/dev/breeze/src/airflow_breeze/prepare_providers/provider_documentation.py 
b/dev/breeze/src/airflow_breeze/prepare_providers/provider_documentation.py
index dc14ea6d08d..fc2d55f86e2 100644
--- a/dev/breeze/src/airflow_breeze/prepare_providers/provider_documentation.py
+++ b/dev/breeze/src/airflow_breeze/prepare_providers/provider_documentation.py
@@ -49,6 +49,7 @@ from airflow_breeze.utils.packages import (
     refresh_provider_metadata_with_provider_id,
     render_template,
 )
+from airflow_breeze.utils.path_utils import AIRFLOW_SOURCES_ROOT
 from airflow_breeze.utils.run_utils import run_command
 from airflow_breeze.utils.shared_options import get_verbose
 from airflow_breeze.utils.versions import get_version_tag
@@ -186,11 +187,14 @@ TYPE_OF_CHANGE_DESCRIPTION = {
 }
 
 
-def _get_git_log_command(from_commit: str | None = None, to_commit: str | None 
= None) -> list[str]:
+def _get_git_log_command(
+    folder_paths: list[Path] | None = None, from_commit: str | None = None, 
to_commit: str | None = None
+) -> list[str]:
     """Get git command to run for the current repo from the current folder.
 
     The current directory should always be the package folder.
 
+    :param folder_paths: list of folder paths to check for changes
     :param from_commit: if present - base commit from which to start the log 
from
     :param to_commit: if present - final commit which should be the start of 
the log
     :return: git command to run
@@ -207,7 +211,8 @@ def _get_git_log_command(from_commit: str | None = None, 
to_commit: str | None =
         git_cmd.append(from_commit)
     elif to_commit:
         raise ValueError("It makes no sense to specify to_commit without 
from_commit.")
-    git_cmd.extend(["--", "."])
+    folders = [folder_path.as_posix() for folder_path in folder_paths] if 
folder_paths else ["."]
+    git_cmd.extend(["--", *folders])
     return git_cmd
 
 
@@ -307,18 +312,24 @@ def _get_all_changes_for_package(
         get_console().print(f"[info]Checking if tag '{current_tag_no_suffix}' 
exist.")
     result = run_command(
         ["git", "rev-parse", current_tag_no_suffix],
-        cwd=provider_details.source_provider_package_path,
+        cwd=AIRFLOW_SOURCES_ROOT,
         stdout=subprocess.DEVNULL,
         stderr=subprocess.DEVNULL,
         check=False,
     )
+    providers_folder_paths = [
+        provider_details.source_provider_package_path,
+        provider_details.old_source_provider_package_path,
+    ]
     if not reapply_templates_only and result.returncode == 0:
         if get_verbose():
             get_console().print(f"[info]The tag {current_tag_no_suffix} 
exists.")
         # The tag already exists
         result = run_command(
-            _get_git_log_command(f"{HTTPS_REMOTE}/{base_branch}", 
current_tag_no_suffix),
-            cwd=provider_details.source_provider_package_path,
+            _get_git_log_command(
+                providers_folder_paths, f"{HTTPS_REMOTE}/{base_branch}", 
current_tag_no_suffix
+            ),
+            cwd=AIRFLOW_SOURCES_ROOT,
             capture_output=True,
             text=True,
             check=True,
@@ -333,8 +344,10 @@ def _get_all_changes_for_package(
                 last_doc_only_hash = doc_only_change_file.read_text().strip()
                 try:
                     result = run_command(
-                        _get_git_log_command(f"{HTTPS_REMOTE}/{base_branch}", 
last_doc_only_hash),
-                        cwd=provider_details.source_provider_package_path,
+                        _get_git_log_command(
+                            providers_folder_paths, 
f"{HTTPS_REMOTE}/{base_branch}", last_doc_only_hash
+                        ),
+                        cwd=AIRFLOW_SOURCES_ROOT,
                         capture_output=True,
                         text=True,
                         check=True,
@@ -387,8 +400,8 @@ def _get_all_changes_for_package(
     for version in provider_details.versions[1:]:
         version_tag = get_version_tag(version, provider_package_id)
         result = run_command(
-            _get_git_log_command(next_version_tag, version_tag),
-            cwd=provider_details.source_provider_package_path,
+            _get_git_log_command(providers_folder_paths, next_version_tag, 
version_tag),
+            cwd=AIRFLOW_SOURCES_ROOT,
             capture_output=True,
             text=True,
             check=True,
@@ -402,7 +415,7 @@ def _get_all_changes_for_package(
         next_version_tag = version_tag
         current_version = version
     result = run_command(
-        _get_git_log_command(next_version_tag),
+        _get_git_log_command(providers_folder_paths, next_version_tag),
         cwd=provider_details.source_provider_package_path,
         capture_output=True,
         text=True,
diff --git a/dev/breeze/src/airflow_breeze/utils/packages.py 
b/dev/breeze/src/airflow_breeze/utils/packages.py
index 5ee2c2a3edf..9f1eae36938 100644
--- a/dev/breeze/src/airflow_breeze/utils/packages.py
+++ b/dev/breeze/src/airflow_breeze/utils/packages.py
@@ -36,6 +36,7 @@ from airflow_breeze.global_constants import (
 )
 from airflow_breeze.utils.console import get_console
 from airflow_breeze.utils.path_utils import (
+    AIRFLOW_OLD_PROVIDERS_DIR,
     AIRFLOW_PROVIDERS_NS_PACKAGE,
     BREEZE_SOURCES_ROOT,
     DOCS_ROOT,
@@ -75,6 +76,7 @@ class ProviderPackageDetails(NamedTuple):
     full_package_name: str
     pypi_package_name: str
     source_provider_package_path: Path
+    old_source_provider_package_path: Path
     documentation_provider_package_path: Path
     changelog_path: Path
     provider_description: str
@@ -385,6 +387,10 @@ def get_source_package_path(provider_id: str) -> Path:
     return AIRFLOW_PROVIDERS_NS_PACKAGE.joinpath(*provider_id.split("."))
 
 
+def get_old_source_package_path(provider_id: str) -> Path:
+    return AIRFLOW_OLD_PROVIDERS_DIR.joinpath(*provider_id.split("."))
+
+
 def get_documentation_package_path(provider_id: str) -> Path:
     return DOCS_ROOT / f"apache-airflow-providers-{provider_id.replace('.', 
'-')}"
 
@@ -515,6 +521,7 @@ def get_provider_details(provider_id: str) -> 
ProviderPackageDetails:
         full_package_name=f"airflow.providers.{provider_id}",
         pypi_package_name=f"apache-airflow-providers-{provider_id.replace('.', 
'-')}",
         source_provider_package_path=get_source_package_path(provider_id),
+        
old_source_provider_package_path=get_old_source_package_path(provider_id),
         
documentation_provider_package_path=get_documentation_package_path(provider_id),
         changelog_path=get_source_package_path(provider_id) / "CHANGELOG.rst",
         provider_description=provider_info["description"],
diff --git a/dev/breeze/src/airflow_breeze/utils/path_utils.py 
b/dev/breeze/src/airflow_breeze/utils/path_utils.py
index 4e510cb7680..0feba56356b 100644
--- a/dev/breeze/src/airflow_breeze/utils/path_utils.py
+++ b/dev/breeze/src/airflow_breeze/utils/path_utils.py
@@ -281,6 +281,7 @@ def find_airflow_sources_root_to_operate_on() -> Path:
 AIRFLOW_SOURCES_ROOT = find_airflow_sources_root_to_operate_on().resolve()
 AIRFLOW_WWW_DIR = AIRFLOW_SOURCES_ROOT / "airflow" / "www"
 AIRFLOW_UI_DIR = AIRFLOW_SOURCES_ROOT / "airflow" / "ui"
+AIRFLOW_OLD_PROVIDERS_DIR = AIRFLOW_SOURCES_ROOT / "airflow" / "providers"
 AIRFLOW_PROVIDERS_PROJECT = AIRFLOW_SOURCES_ROOT / "providers"
 AIRFLOW_PROVIDERS_SRC = AIRFLOW_PROVIDERS_PROJECT / "src"
 AIRFLOW_PROVIDERS_NS_PACKAGE = AIRFLOW_PROVIDERS_SRC / "airflow" / "providers"
diff --git a/dev/breeze/tests/test_provider_documentation.py 
b/dev/breeze/tests/test_provider_documentation.py
index db770b7856a..0a56e158bcf 100644
--- a/dev/breeze/tests/test_provider_documentation.py
+++ b/dev/breeze/tests/test_provider_documentation.py
@@ -18,6 +18,7 @@ from __future__ import annotations
 
 import random
 import string
+from pathlib import Path
 
 import pytest
 
@@ -97,28 +98,47 @@ def test_get_version_tag(version: str, provider_id: str, 
suffix: str, tag: str):
 
 
 @pytest.mark.parametrize(
-    "from_commit, to_commit, git_command",
+    "folder_paths, from_commit, to_commit, git_command",
     [
-        (None, None, ["git", "log", "--pretty=format:%H %h %cd %s", 
"--date=short", "--", "."]),
+        (None, None, None, ["git", "log", "--pretty=format:%H %h %cd %s", 
"--date=short", "--", "."]),
         (
+            None,
             "from_tag",
             None,
             ["git", "log", "--pretty=format:%H %h %cd %s", "--date=short", 
"from_tag", "--", "."],
         ),
         (
+            None,
             "from_tag",
             "to_tag",
             ["git", "log", "--pretty=format:%H %h %cd %s", "--date=short", 
"from_tag...to_tag", "--", "."],
         ),
+        (
+            [Path("a"), Path("b")],
+            "from_tag",
+            "to_tag",
+            [
+                "git",
+                "log",
+                "--pretty=format:%H %h %cd %s",
+                "--date=short",
+                "from_tag...to_tag",
+                "--",
+                "a",
+                "b",
+            ],
+        ),
     ],
 )
-def test_get_git_log_command(from_commit: str | None, to_commit: str | None, 
git_command: list[str]):
-    assert _get_git_log_command(from_commit, to_commit) == git_command
+def test_get_git_log_command(
+    folder_paths: list[str] | None, from_commit: str | None, to_commit: str | 
None, git_command: list[str]
+):
+    assert _get_git_log_command(folder_paths, from_commit, to_commit) == 
git_command
 
 
 def test_get_git_log_command_wrong():
     with pytest.raises(ValueError, match=r"to_commit without from_commit"):
-        _get_git_log_command(None, "to_commit")
+        _get_git_log_command(None, None, "to_commit")
 
 
 @pytest.mark.parametrize(

Reply via email to