Copilot commented on code in PR #64103:
URL: https://github.com/apache/airflow/pull/64103#discussion_r3025333028
##########
providers/git/src/airflow/providers/git/hooks/git.py:
##########
@@ -77,27 +77,50 @@ def __init__(
if self.key_file and self.private_key:
raise AirflowException("Both 'key_file' and 'private_key' cannot
be provided at the same time")
self._process_git_auth_url()
+ self._setup_askpass()
def _build_ssh_command(self, key_path: str) -> str:
return (
f"ssh -i {key_path} "
f"-o IdentitiesOnly=yes "
f"-o StrictHostKeyChecking={self.strict_host_key_checking}"
)
-
+
def _process_git_auth_url(self):
if not isinstance(self.repo_url, str):
return
- if self.auth_token and self.repo_url.startswith("https://"):
- self.repo_url = self.repo_url.replace("https://",
f"https://{self.user_name}:{self.auth_token}@")
- elif self.auth_token and self.repo_url.startswith("http://"):
- self.repo_url = self.repo_url.replace("http://",
f"http://{self.user_name}:{self.auth_token}@")
- elif self.repo_url.startswith("http://"):
- # if no auth token, use the repo url as is
- self.repo_url = self.repo_url
- elif not self.repo_url.startswith("git@") or not
self.repo_url.startswith("https://"):
+
+ if self.repo_url.startswith(("http://", "https://")):
+ if self.auth_token:
+ log.info(
+ "Using token-based authentication via GIT_ASKPASS. "
+ "Credentials are not embedded in the repository URL."
+ )
+ return
+
+ if not self.repo_url.startswith("git@"):
self.repo_url = os.path.expanduser(self.repo_url)
+ def _setup_askpass(self):
+ if not self.auth_token:
+ return
+
+ script = f"""#!/bin/sh
+case "$1" in
+Username*) echo "{self.user_name}" ;;
+Password*) echo "{self.auth_token}" ;;
+esac
+"""
Review Comment:
The askpass script interpolates user_name/auth_token directly into a shell
script. If either value contains quotes/newlines/shell metacharacters, the
script can break (or behave unexpectedly). Consider avoiding embedding secrets
in the script body: have the script print values from environment variables
using printf (or otherwise properly escape) so credentials are handled safely.
##########
providers/git/src/airflow/providers/git/hooks/git.py:
##########
@@ -77,27 +77,50 @@ def __init__(
if self.key_file and self.private_key:
raise AirflowException("Both 'key_file' and 'private_key' cannot
be provided at the same time")
self._process_git_auth_url()
+ self._setup_askpass()
def _build_ssh_command(self, key_path: str) -> str:
return (
f"ssh -i {key_path} "
f"-o IdentitiesOnly=yes "
f"-o StrictHostKeyChecking={self.strict_host_key_checking}"
)
-
+
def _process_git_auth_url(self):
if not isinstance(self.repo_url, str):
return
- if self.auth_token and self.repo_url.startswith("https://"):
- self.repo_url = self.repo_url.replace("https://",
f"https://{self.user_name}:{self.auth_token}@")
- elif self.auth_token and self.repo_url.startswith("http://"):
- self.repo_url = self.repo_url.replace("http://",
f"http://{self.user_name}:{self.auth_token}@")
- elif self.repo_url.startswith("http://"):
- # if no auth token, use the repo url as is
- self.repo_url = self.repo_url
- elif not self.repo_url.startswith("git@") or not
self.repo_url.startswith("https://"):
+
+ if self.repo_url.startswith(("http://", "https://")):
+ if self.auth_token:
+ log.info(
+ "Using token-based authentication via GIT_ASKPASS. "
+ "Credentials are not embedded in the repository URL."
+ )
+ return
+
+ if not self.repo_url.startswith("git@"):
self.repo_url = os.path.expanduser(self.repo_url)
+ def _setup_askpass(self):
+ if not self.auth_token:
+ return
+
+ script = f"""#!/bin/sh
+case "$1" in
+Username*) echo "{self.user_name}" ;;
+Password*) echo "{self.auth_token}" ;;
+esac
+"""
+
+ tmp = tempfile.NamedTemporaryFile(delete=False, mode="w")
+ tmp.write(script)
+ tmp.flush()
+ os.chmod(tmp.name, 0o700)
+
+ self.env["GIT_ASKPASS"] = tmp.name
+ self.env["GIT_TERMINAL_PROMPT"] = "0"
Review Comment:
The GIT_ASKPASS helper is created with delete=False and never removed,
leaving a plaintext token on disk indefinitely. Also, the NamedTemporaryFile
handle isn’t closed. Please create the askpass script within a context manager
that guarantees cleanup (e.g., via configure_hook_env with a try/finally that
unlinks the file) and close the file handle after writing.
##########
providers/git/src/airflow/providers/git/hooks/git.py:
##########
@@ -77,27 +77,50 @@ def __init__(
if self.key_file and self.private_key:
raise AirflowException("Both 'key_file' and 'private_key' cannot
be provided at the same time")
self._process_git_auth_url()
+ self._setup_askpass()
def _build_ssh_command(self, key_path: str) -> str:
return (
f"ssh -i {key_path} "
f"-o IdentitiesOnly=yes "
f"-o StrictHostKeyChecking={self.strict_host_key_checking}"
)
-
+
def _process_git_auth_url(self):
if not isinstance(self.repo_url, str):
return
- if self.auth_token and self.repo_url.startswith("https://"):
- self.repo_url = self.repo_url.replace("https://",
f"https://{self.user_name}:{self.auth_token}@")
- elif self.auth_token and self.repo_url.startswith("http://"):
- self.repo_url = self.repo_url.replace("http://",
f"http://{self.user_name}:{self.auth_token}@")
- elif self.repo_url.startswith("http://"):
- # if no auth token, use the repo url as is
- self.repo_url = self.repo_url
- elif not self.repo_url.startswith("git@") or not
self.repo_url.startswith("https://"):
+
+ if self.repo_url.startswith(("http://", "https://")):
+ if self.auth_token:
+ log.info(
+ "Using token-based authentication via GIT_ASKPASS. "
+ "Credentials are not embedded in the repository URL."
+ )
+ return
Review Comment:
This change removes credential embedding from repo_url, but existing unit
tests still expect URLs like https://user:<token>@... (see
providers/git/tests/unit/git/hooks/test_git.py:test_correct_repo_urls). Please
update/add tests to assert the URL remains unmodified and that askpass env vars
are set/cleaned up appropriately.
##########
providers/git/src/airflow/providers/git/hooks/git.py:
##########
@@ -77,27 +77,50 @@ def __init__(
if self.key_file and self.private_key:
raise AirflowException("Both 'key_file' and 'private_key' cannot
be provided at the same time")
self._process_git_auth_url()
+ self._setup_askpass()
def _build_ssh_command(self, key_path: str) -> str:
return (
f"ssh -i {key_path} "
f"-o IdentitiesOnly=yes "
f"-o StrictHostKeyChecking={self.strict_host_key_checking}"
)
-
+
def _process_git_auth_url(self):
if not isinstance(self.repo_url, str):
return
- if self.auth_token and self.repo_url.startswith("https://"):
- self.repo_url = self.repo_url.replace("https://",
f"https://{self.user_name}:{self.auth_token}@")
- elif self.auth_token and self.repo_url.startswith("http://"):
- self.repo_url = self.repo_url.replace("http://",
f"http://{self.user_name}:{self.auth_token}@")
- elif self.repo_url.startswith("http://"):
- # if no auth token, use the repo url as is
- self.repo_url = self.repo_url
- elif not self.repo_url.startswith("git@") or not
self.repo_url.startswith("https://"):
+
+ if self.repo_url.startswith(("http://", "https://")):
+ if self.auth_token:
+ log.info(
+ "Using token-based authentication via GIT_ASKPASS. "
+ "Credentials are not embedded in the repository URL."
+ )
+ return
+
+ if not self.repo_url.startswith("git@"):
self.repo_url = os.path.expanduser(self.repo_url)
+ def _setup_askpass(self):
+ if not self.auth_token:
+ return
+
+ script = f"""#!/bin/sh
+case "$1" in
+Username*) echo "{self.user_name}" ;;
+Password*) echo "{self.auth_token}" ;;
+esac
+"""
+
+ tmp = tempfile.NamedTemporaryFile(delete=False, mode="w")
+ tmp.write(script)
+ tmp.flush()
+ os.chmod(tmp.name, 0o700)
+
+ self.env["GIT_ASKPASS"] = tmp.name
+ self.env["GIT_TERMINAL_PROMPT"] = "0"
Review Comment:
Trailing whitespace at end of line will be flagged by ruff (W291). Please
remove the extra space after the string literal.
```suggestion
self.env["GIT_TERMINAL_PROMPT"] = "0"
```
##########
providers/git/src/airflow/providers/git/hooks/git.py:
##########
@@ -77,27 +77,50 @@ def __init__(
if self.key_file and self.private_key:
raise AirflowException("Both 'key_file' and 'private_key' cannot
be provided at the same time")
self._process_git_auth_url()
+ self._setup_askpass()
def _build_ssh_command(self, key_path: str) -> str:
return (
f"ssh -i {key_path} "
f"-o IdentitiesOnly=yes "
f"-o StrictHostKeyChecking={self.strict_host_key_checking}"
)
-
+
def _process_git_auth_url(self):
if not isinstance(self.repo_url, str):
return
- if self.auth_token and self.repo_url.startswith("https://"):
- self.repo_url = self.repo_url.replace("https://",
f"https://{self.user_name}:{self.auth_token}@")
- elif self.auth_token and self.repo_url.startswith("http://"):
- self.repo_url = self.repo_url.replace("http://",
f"http://{self.user_name}:{self.auth_token}@")
- elif self.repo_url.startswith("http://"):
- # if no auth token, use the repo url as is
- self.repo_url = self.repo_url
- elif not self.repo_url.startswith("git@") or not
self.repo_url.startswith("https://"):
+
+ if self.repo_url.startswith(("http://", "https://")):
+ if self.auth_token:
+ log.info(
+ "Using token-based authentication via GIT_ASKPASS. "
+ "Credentials are not embedded in the repository URL."
+ )
+ return
Review Comment:
_process_git_auth_url returns early for http(s) URLs; when no auth_token is
set, GIT_TERMINAL_PROMPT is not disabled. This can cause git commands to block
waiting for interactive credentials and also doesn’t match the PR description
claim that prompts are disabled. Consider setting GIT_TERMINAL_PROMPT=0 for all
http(s) URLs (not only when a token is configured).
##########
providers/git/src/airflow/providers/git/hooks/git.py:
##########
@@ -77,27 +77,50 @@ def __init__(
if self.key_file and self.private_key:
raise AirflowException("Both 'key_file' and 'private_key' cannot
be provided at the same time")
self._process_git_auth_url()
+ self._setup_askpass()
def _build_ssh_command(self, key_path: str) -> str:
return (
f"ssh -i {key_path} "
f"-o IdentitiesOnly=yes "
f"-o StrictHostKeyChecking={self.strict_host_key_checking}"
)
-
+
Review Comment:
There is trailing whitespace on this blank line, which will be flagged by
ruff (W293). Please remove the whitespace.
```suggestion
```
##########
providers/git/src/airflow/providers/git/hooks/git.py:
##########
@@ -77,27 +77,50 @@ def __init__(
if self.key_file and self.private_key:
raise AirflowException("Both 'key_file' and 'private_key' cannot
be provided at the same time")
self._process_git_auth_url()
+ self._setup_askpass()
def _build_ssh_command(self, key_path: str) -> str:
return (
f"ssh -i {key_path} "
f"-o IdentitiesOnly=yes "
f"-o StrictHostKeyChecking={self.strict_host_key_checking}"
)
-
+
def _process_git_auth_url(self):
if not isinstance(self.repo_url, str):
return
- if self.auth_token and self.repo_url.startswith("https://"):
- self.repo_url = self.repo_url.replace("https://",
f"https://{self.user_name}:{self.auth_token}@")
- elif self.auth_token and self.repo_url.startswith("http://"):
- self.repo_url = self.repo_url.replace("http://",
f"http://{self.user_name}:{self.auth_token}@")
- elif self.repo_url.startswith("http://"):
- # if no auth token, use the repo url as is
- self.repo_url = self.repo_url
- elif not self.repo_url.startswith("git@") or not
self.repo_url.startswith("https://"):
+
+ if self.repo_url.startswith(("http://", "https://")):
+ if self.auth_token:
+ log.info(
+ "Using token-based authentication via GIT_ASKPASS. "
+ "Credentials are not embedded in the repository URL."
+ )
+ return
+
+ if not self.repo_url.startswith("git@"):
self.repo_url = os.path.expanduser(self.repo_url)
+ def _setup_askpass(self):
+ if not self.auth_token:
+ return
+
+ script = f"""#!/bin/sh
+case "$1" in
+Username*) echo "{self.user_name}" ;;
+Password*) echo "{self.auth_token}" ;;
+esac
+"""
+
+ tmp = tempfile.NamedTemporaryFile(delete=False, mode="w")
+ tmp.write(script)
+ tmp.flush()
+ os.chmod(tmp.name, 0o700)
+
+ self.env["GIT_ASKPASS"] = tmp.name
+ self.env["GIT_TERMINAL_PROMPT"] = "0"
Review Comment:
Setting GIT_ASKPASS/GIT_TERMINAL_PROMPT into self.env here only helps
callers that explicitly propagate hook.env into every git invocation. In the
current codebase, some git operations use custom_environment with only
GIT_SSH_COMMAND (e.g., GitDagBundle._fetch_bare_repo in
providers/git/src/airflow/providers/git/bundles/git.py), so HTTPS token auth
may work for the initial clone but fail on subsequent fetches/submodule
operations. Consider providing a context manager/helper that applies the full
env (including askpass) for all git commands executed by the bundle.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]