Dev-iL commented on code in PR #60280:
URL: https://github.com/apache/airflow/pull/60280#discussion_r2681818192
##########
providers/git/src/airflow/providers/git/bundles/git.py:
##########
@@ -27,13 +27,63 @@
from git.exc import BadName, GitCommandError, InvalidGitRepositoryError,
NoSuchPathError
from tenacity import retry, retry_if_exception_type, stop_after_attempt
-from airflow.dag_processing.bundles.base import BaseDagBundle
+from airflow.dag_processing.bundles.base import BaseDagBundle,
get_bundle_permissions
from airflow.providers.common.compat.sdk import AirflowException
from airflow.providers.git.hooks.git import GitHook
log = structlog.get_logger(__name__)
+def _apply_permissions_recursively(path: Path) -> None:
+ """
+ Apply configured bundle permissions to a directory tree.
+
+ This ensures that when user impersonation is used, the impersonated user
+ can access the cloned repository files. Permissions are applied at clone
+ time regardless of whether all or only some tasks use run_as_user, because:
+ 1. DAG parsing needs access before task execution
+ 2. Bundles may serve multiple DAGs with different impersonation settings
+ 3. Applying permissions upfront provides a consistent security model
+
+ :param path: The root path to apply permissions to recursively
+ """
+ folder_perms, file_perms = get_bundle_permissions()
+ # While individual chmod failures are suppressed, os.walk errors are more
serious - so they're raised.
+ for root, dirs, files in os.walk(path):
+ root_path = Path(root)
+ with suppress(OSError):
+ root_path.chmod(folder_perms)
+ for d in dirs:
+ with suppress(OSError):
+ (root_path / d).chmod(folder_perms)
+ for f in files:
+ with suppress(OSError):
+ (root_path / f).chmod(file_perms)
+
+
+def _configure_git_safe_directory(path: Path) -> None:
+ """
+ Add path to git safe.directory to allow cross-user access.
+
+ Git 2.35.2+ refuses to operate on repositories owned by different users
+ without explicit safe directory configuration. This is needed when using
+ user impersonation (run_as_user) where the repository is created by one
+ user but accessed by another.
+
+ :param path: The repository path to add as a safe directory
+ """
+ import subprocess
+
+ try:
+ subprocess.run(
+ ["git", "config", "--global", "--add", "safe.directory",
str(path)],
Review Comment:
- Why global: I believe repository-local config is ignored for security
reasons (otherwise malicious repos could whitelist themselves). Airflow
typically runs in controlled environments (containers/VMs), modifying the
global git config for the airflow user should be acceptable.
- Why not GitPython:
1. We need to configure `safe.directory` before creating the `Repo` object
(L234), because `GitPython` internally runs git commands during `Repo`
initialization. If the ownership check fails at that point, we get an exception
before we can configure anything.
2. While `GitPython` has `GitConfigParser` that can write to any config
file, using it for `~/.gitconfig` would essentially reimplement `git config
--global` with more code.
##########
providers/git/src/airflow/providers/git/bundles/git.py:
##########
@@ -27,13 +27,63 @@
from git.exc import BadName, GitCommandError, InvalidGitRepositoryError,
NoSuchPathError
from tenacity import retry, retry_if_exception_type, stop_after_attempt
-from airflow.dag_processing.bundles.base import BaseDagBundle
+from airflow.dag_processing.bundles.base import BaseDagBundle,
get_bundle_permissions
from airflow.providers.common.compat.sdk import AirflowException
from airflow.providers.git.hooks.git import GitHook
log = structlog.get_logger(__name__)
+def _apply_permissions_recursively(path: Path) -> None:
+ """
+ Apply configured bundle permissions to a directory tree.
+
+ This ensures that when user impersonation is used, the impersonated user
+ can access the cloned repository files. Permissions are applied at clone
+ time regardless of whether all or only some tasks use run_as_user, because:
+ 1. DAG parsing needs access before task execution
+ 2. Bundles may serve multiple DAGs with different impersonation settings
+ 3. Applying permissions upfront provides a consistent security model
+
+ :param path: The root path to apply permissions to recursively
+ """
+ folder_perms, file_perms = get_bundle_permissions()
+ # While individual chmod failures are suppressed, os.walk errors are more
serious - so they're raised.
+ for root, dirs, files in os.walk(path):
+ root_path = Path(root)
+ with suppress(OSError):
+ root_path.chmod(folder_perms)
+ for d in dirs:
+ with suppress(OSError):
+ (root_path / d).chmod(folder_perms)
+ for f in files:
+ with suppress(OSError):
+ (root_path / f).chmod(file_perms)
+
+
+def _configure_git_safe_directory(path: Path) -> None:
+ """
+ Add path to git safe.directory to allow cross-user access.
+
+ Git 2.35.2+ refuses to operate on repositories owned by different users
+ without explicit safe directory configuration. This is needed when using
+ user impersonation (run_as_user) where the repository is created by one
+ user but accessed by another.
+
+ :param path: The repository path to add as a safe directory
+ """
+ import subprocess
Review Comment:
Will move.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]