This is an automated email from the ASF dual-hosted git repository.
ephraimanierobi pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/main by this push:
new 3fe68e79b06 Fix GitDagBundle re-cloning on every task when
prune_dotgit_folder is True (#61847)
3fe68e79b06 is described below
commit 3fe68e79b0651959c4ddfbd4a06c101831dcff80
Author: Ephraim Anierobi <[email protected]>
AuthorDate: Mon Feb 16 08:16:46 2026 +0100
Fix GitDagBundle re-cloning on every task when prune_dotgit_folder is True
(#61847)
* Fix GitDagBundle re-cloning on every task when prune_dotgit_folder is True
When a version directory already existed without a .git folder, workers
treated
it as invalid and deleted it then re-cloned. Tasks using the same bundle
version
would trigger a full clone on every run.
Detect an existing pruned worktree and reuse it instead of cloning again
* fixup! Fix GitDagBundle re-cloning on every task when prune_dotgit_folder
is True
---
.../git/src/airflow/providers/git/bundles/git.py | 24 ++++++++++++-
providers/git/tests/unit/git/bundles/test_git.py | 42 ++++++++++++++++++++++
2 files changed, 65 insertions(+), 1 deletion(-)
diff --git a/providers/git/src/airflow/providers/git/bundles/git.py
b/providers/git/src/airflow/providers/git/bundles/git.py
index 1f0b0c56f6a..9b3e873cd4c 100644
--- a/providers/git/src/airflow/providers/git/bundles/git.py
+++ b/providers/git/src/airflow/providers/git/bundles/git.py
@@ -115,8 +115,25 @@ class GitDagBundle(BaseDagBundle):
self.repo_url = self.hook.repo_url
self._log.debug("repo_url updated from hook")
+ def _is_pruned_worktree(self) -> bool:
+ # True if version path exists and has no .git
+ if not self.version:
+ return False
+ if not self.repo_path.exists() or not self.repo_path.is_dir():
+ return False
+ return not (self.repo_path / ".git").exists()
+
def _initialize(self):
with self.lock():
+ # Avoids re-cloning on every task run when
prune_dotgit_folder=True.
+ if self._is_pruned_worktree():
+ self._log.debug(
+ "Using existing pruned worktree",
+ repo_path=self.repo_path,
+ version=self.version,
+ )
+ return
+
cm = self.hook.configure_hook_env() if self.hook else nullcontext()
with cm:
try:
@@ -151,11 +168,14 @@ class GitDagBundle(BaseDagBundle):
raise RuntimeError("Error pulling submodule from
repository") from e
if self.prune_dotgit_folder:
+ self.repo.close()
shutil.rmtree(self.repo_path / ".git")
+ self.repo = None
else:
self.refresh()
- self.repo.close()
+ if self.repo is not None:
+ self.repo.close()
def initialize(self) -> None:
if not self.repo_url:
@@ -246,6 +266,8 @@ class GitDagBundle(BaseDagBundle):
)
def get_current_version(self) -> str:
+ if self.version is not None and getattr(self, "repo", None) is None:
+ return self.version
with self.repo as repo:
return repo.head.commit.hexsha
diff --git a/providers/git/tests/unit/git/bundles/test_git.py
b/providers/git/tests/unit/git/bundles/test_git.py
index 5b64b3db33d..ade0c306755 100644
--- a/providers/git/tests/unit/git/bundles/test_git.py
+++ b/providers/git/tests/unit/git/bundles/test_git.py
@@ -67,6 +67,9 @@ def git_repo(tmp_path_factory):
def assert_repo_is_closed(bundle: GitDagBundle):
+ # When .git was pruned, repo is cleared and there is nothing to close
+ if getattr(bundle, "repo", None) is None:
+ return
# cat-file processes get left around if the repo is not closed, so check
it was
assert bundle.repo.git.cat_file_all is None
assert bundle.bare_repo.git.cat_file_all is None
@@ -266,6 +269,45 @@ class TestGitDagBundle:
assert_repo_is_closed(bundle)
+ @mock.patch("airflow.providers.git.bundles.git.GitHook")
+ def test_second_initialize_reuses_pruned_worktree_without_recloning(self,
mock_githook, git_repo):
+ """When version path exists without .git (pruned), second initialize()
uses it and does not re-clone."""
+ repo_path, repo = git_repo
+ mock_githook.return_value.repo_url = repo_path
+ starting_commit = repo.head.commit
+ version = starting_commit.hexsha
+ bundle_name = "test_pruned_reuse"
+
+ # First init: clone and prune (default)
+ bundle1 = GitDagBundle(
+ name=bundle_name,
+ git_conn_id=CONN_HTTPS,
+ version=version,
+ tracking_ref=GIT_DEFAULT_BRANCH,
+ prune_dotgit_folder=True,
+ )
+ bundle1.initialize()
+ assert not (bundle1.repo_path / ".git").exists()
+ assert bundle1.get_current_version() == version
+ version_path = bundle1.repo_path
+
+ # Second init: same name and version; should detect pruned worktree
and skip clone
+ with patch.object(GitDagBundle, "_clone_repo_if_required") as
mock_clone:
+ bundle2 = GitDagBundle(
+ name=bundle_name,
+ git_conn_id=CONN_HTTPS,
+ version=version,
+ tracking_ref=GIT_DEFAULT_BRANCH,
+ prune_dotgit_folder=True,
+ )
+ bundle2.initialize()
+ mock_clone.assert_not_called()
+
+ assert bundle2.repo_path == version_path
+ assert bundle2.get_current_version() == version
+ files_in_repo = {f.name for f in bundle2.path.iterdir() if f.is_file()}
+ assert {"test_dag.py"} == files_in_repo
+
@pytest.mark.parametrize(
"amend",
[