Copilot commented on code in PR #64103:
URL: https://github.com/apache/airflow/pull/64103#discussion_r3025333028


##########
providers/git/src/airflow/providers/git/hooks/git.py:
##########
@@ -77,27 +77,50 @@ def __init__(
         if self.key_file and self.private_key:
             raise AirflowException("Both 'key_file' and 'private_key' cannot 
be provided at the same time")
         self._process_git_auth_url()
+        self._setup_askpass()
 
     def _build_ssh_command(self, key_path: str) -> str:
         return (
             f"ssh -i {key_path} "
             f"-o IdentitiesOnly=yes "
             f"-o StrictHostKeyChecking={self.strict_host_key_checking}"
         )
-
+    
     def _process_git_auth_url(self):
         if not isinstance(self.repo_url, str):
             return
-        if self.auth_token and self.repo_url.startswith("https://";):
-            self.repo_url = self.repo_url.replace("https://";, 
f"https://{self.user_name}:{self.auth_token}@";)
-        elif self.auth_token and self.repo_url.startswith("http://";):
-            self.repo_url = self.repo_url.replace("http://";, 
f"http://{self.user_name}:{self.auth_token}@";)
-        elif self.repo_url.startswith("http://";):
-            # if no auth token, use the repo url as is
-            self.repo_url = self.repo_url
-        elif not self.repo_url.startswith("git@") or not 
self.repo_url.startswith("https://";):
+
+        if self.repo_url.startswith(("http://";, "https://";)):
+            if self.auth_token:
+                log.info(
+                    "Using token-based authentication via GIT_ASKPASS. "
+                    "Credentials are not embedded in the repository URL."
+                )
+            return
+
+        if not self.repo_url.startswith("git@"):
             self.repo_url = os.path.expanduser(self.repo_url)
 
+    def _setup_askpass(self):
+        if not self.auth_token:
+            return
+
+        script = f"""#!/bin/sh
+case "$1" in
+Username*) echo "{self.user_name}" ;;
+Password*) echo "{self.auth_token}" ;;
+esac
+"""

Review Comment:
   The askpass script interpolates user_name/auth_token directly into a shell 
script. If either value contains quotes/newlines/shell metacharacters, the 
script can break (or behave unexpectedly). Consider avoiding embedding secrets 
in the script body: have the script print values from environment variables 
using printf (or otherwise properly escape) so credentials are handled safely.



##########
providers/git/src/airflow/providers/git/hooks/git.py:
##########
@@ -77,27 +77,50 @@ def __init__(
         if self.key_file and self.private_key:
             raise AirflowException("Both 'key_file' and 'private_key' cannot 
be provided at the same time")
         self._process_git_auth_url()
+        self._setup_askpass()
 
     def _build_ssh_command(self, key_path: str) -> str:
         return (
             f"ssh -i {key_path} "
             f"-o IdentitiesOnly=yes "
             f"-o StrictHostKeyChecking={self.strict_host_key_checking}"
         )
-
+    
     def _process_git_auth_url(self):
         if not isinstance(self.repo_url, str):
             return
-        if self.auth_token and self.repo_url.startswith("https://";):
-            self.repo_url = self.repo_url.replace("https://";, 
f"https://{self.user_name}:{self.auth_token}@";)
-        elif self.auth_token and self.repo_url.startswith("http://";):
-            self.repo_url = self.repo_url.replace("http://";, 
f"http://{self.user_name}:{self.auth_token}@";)
-        elif self.repo_url.startswith("http://";):
-            # if no auth token, use the repo url as is
-            self.repo_url = self.repo_url
-        elif not self.repo_url.startswith("git@") or not 
self.repo_url.startswith("https://";):
+
+        if self.repo_url.startswith(("http://";, "https://";)):
+            if self.auth_token:
+                log.info(
+                    "Using token-based authentication via GIT_ASKPASS. "
+                    "Credentials are not embedded in the repository URL."
+                )
+            return
+
+        if not self.repo_url.startswith("git@"):
             self.repo_url = os.path.expanduser(self.repo_url)
 
+    def _setup_askpass(self):
+        if not self.auth_token:
+            return
+
+        script = f"""#!/bin/sh
+case "$1" in
+Username*) echo "{self.user_name}" ;;
+Password*) echo "{self.auth_token}" ;;
+esac
+"""
+
+        tmp = tempfile.NamedTemporaryFile(delete=False, mode="w")
+        tmp.write(script)
+        tmp.flush()
+        os.chmod(tmp.name, 0o700)
+
+        self.env["GIT_ASKPASS"] = tmp.name
+        self.env["GIT_TERMINAL_PROMPT"] = "0" 

Review Comment:
   The GIT_ASKPASS helper is created with delete=False and never removed, 
leaving a plaintext token on disk indefinitely. Also, the NamedTemporaryFile 
handle isn’t closed. Please create the askpass script within a context manager 
that guarantees cleanup (e.g., via configure_hook_env with a try/finally that 
unlinks the file) and close the file handle after writing.



##########
providers/git/src/airflow/providers/git/hooks/git.py:
##########
@@ -77,27 +77,50 @@ def __init__(
         if self.key_file and self.private_key:
             raise AirflowException("Both 'key_file' and 'private_key' cannot 
be provided at the same time")
         self._process_git_auth_url()
+        self._setup_askpass()
 
     def _build_ssh_command(self, key_path: str) -> str:
         return (
             f"ssh -i {key_path} "
             f"-o IdentitiesOnly=yes "
             f"-o StrictHostKeyChecking={self.strict_host_key_checking}"
         )
-
+    
     def _process_git_auth_url(self):
         if not isinstance(self.repo_url, str):
             return
-        if self.auth_token and self.repo_url.startswith("https://";):
-            self.repo_url = self.repo_url.replace("https://";, 
f"https://{self.user_name}:{self.auth_token}@";)
-        elif self.auth_token and self.repo_url.startswith("http://";):
-            self.repo_url = self.repo_url.replace("http://";, 
f"http://{self.user_name}:{self.auth_token}@";)
-        elif self.repo_url.startswith("http://";):
-            # if no auth token, use the repo url as is
-            self.repo_url = self.repo_url
-        elif not self.repo_url.startswith("git@") or not 
self.repo_url.startswith("https://";):
+
+        if self.repo_url.startswith(("http://";, "https://";)):
+            if self.auth_token:
+                log.info(
+                    "Using token-based authentication via GIT_ASKPASS. "
+                    "Credentials are not embedded in the repository URL."
+                )
+            return

Review Comment:
   This change removes credential embedding from repo_url, but existing unit 
tests still expect URLs like https://user:<token>@... (see 
providers/git/tests/unit/git/hooks/test_git.py:test_correct_repo_urls). Please 
update/add tests to assert the URL remains unmodified and that askpass env vars 
are set/cleaned up appropriately.



##########
providers/git/src/airflow/providers/git/hooks/git.py:
##########
@@ -77,27 +77,50 @@ def __init__(
         if self.key_file and self.private_key:
             raise AirflowException("Both 'key_file' and 'private_key' cannot 
be provided at the same time")
         self._process_git_auth_url()
+        self._setup_askpass()
 
     def _build_ssh_command(self, key_path: str) -> str:
         return (
             f"ssh -i {key_path} "
             f"-o IdentitiesOnly=yes "
             f"-o StrictHostKeyChecking={self.strict_host_key_checking}"
         )
-
+    
     def _process_git_auth_url(self):
         if not isinstance(self.repo_url, str):
             return
-        if self.auth_token and self.repo_url.startswith("https://";):
-            self.repo_url = self.repo_url.replace("https://";, 
f"https://{self.user_name}:{self.auth_token}@";)
-        elif self.auth_token and self.repo_url.startswith("http://";):
-            self.repo_url = self.repo_url.replace("http://";, 
f"http://{self.user_name}:{self.auth_token}@";)
-        elif self.repo_url.startswith("http://";):
-            # if no auth token, use the repo url as is
-            self.repo_url = self.repo_url
-        elif not self.repo_url.startswith("git@") or not 
self.repo_url.startswith("https://";):
+
+        if self.repo_url.startswith(("http://";, "https://";)):
+            if self.auth_token:
+                log.info(
+                    "Using token-based authentication via GIT_ASKPASS. "
+                    "Credentials are not embedded in the repository URL."
+                )
+            return
+
+        if not self.repo_url.startswith("git@"):
             self.repo_url = os.path.expanduser(self.repo_url)
 
+    def _setup_askpass(self):
+        if not self.auth_token:
+            return
+
+        script = f"""#!/bin/sh
+case "$1" in
+Username*) echo "{self.user_name}" ;;
+Password*) echo "{self.auth_token}" ;;
+esac
+"""
+
+        tmp = tempfile.NamedTemporaryFile(delete=False, mode="w")
+        tmp.write(script)
+        tmp.flush()
+        os.chmod(tmp.name, 0o700)
+
+        self.env["GIT_ASKPASS"] = tmp.name
+        self.env["GIT_TERMINAL_PROMPT"] = "0" 

Review Comment:
   Trailing whitespace at end of line will be flagged by ruff (W291). Please 
remove the extra space after the string literal.
   ```suggestion
           self.env["GIT_TERMINAL_PROMPT"] = "0"
   ```



##########
providers/git/src/airflow/providers/git/hooks/git.py:
##########
@@ -77,27 +77,50 @@ def __init__(
         if self.key_file and self.private_key:
             raise AirflowException("Both 'key_file' and 'private_key' cannot 
be provided at the same time")
         self._process_git_auth_url()
+        self._setup_askpass()
 
     def _build_ssh_command(self, key_path: str) -> str:
         return (
             f"ssh -i {key_path} "
             f"-o IdentitiesOnly=yes "
             f"-o StrictHostKeyChecking={self.strict_host_key_checking}"
         )
-
+    
     def _process_git_auth_url(self):
         if not isinstance(self.repo_url, str):
             return
-        if self.auth_token and self.repo_url.startswith("https://";):
-            self.repo_url = self.repo_url.replace("https://";, 
f"https://{self.user_name}:{self.auth_token}@";)
-        elif self.auth_token and self.repo_url.startswith("http://";):
-            self.repo_url = self.repo_url.replace("http://";, 
f"http://{self.user_name}:{self.auth_token}@";)
-        elif self.repo_url.startswith("http://";):
-            # if no auth token, use the repo url as is
-            self.repo_url = self.repo_url
-        elif not self.repo_url.startswith("git@") or not 
self.repo_url.startswith("https://";):
+
+        if self.repo_url.startswith(("http://";, "https://";)):
+            if self.auth_token:
+                log.info(
+                    "Using token-based authentication via GIT_ASKPASS. "
+                    "Credentials are not embedded in the repository URL."
+                )
+            return

Review Comment:
   _process_git_auth_url returns early for http(s) URLs; when no auth_token is 
set, GIT_TERMINAL_PROMPT is not disabled. This can cause git commands to block 
waiting for interactive credentials and also doesn’t match the PR description 
claim that prompts are disabled. Consider setting GIT_TERMINAL_PROMPT=0 for all 
http(s) URLs (not only when a token is configured).



##########
providers/git/src/airflow/providers/git/hooks/git.py:
##########
@@ -77,27 +77,50 @@ def __init__(
         if self.key_file and self.private_key:
             raise AirflowException("Both 'key_file' and 'private_key' cannot 
be provided at the same time")
         self._process_git_auth_url()
+        self._setup_askpass()
 
     def _build_ssh_command(self, key_path: str) -> str:
         return (
             f"ssh -i {key_path} "
             f"-o IdentitiesOnly=yes "
             f"-o StrictHostKeyChecking={self.strict_host_key_checking}"
         )
-
+    

Review Comment:
   There is trailing whitespace on this blank line, which will be flagged by 
ruff (W293). Please remove the whitespace.
   ```suggestion
   
   ```



##########
providers/git/src/airflow/providers/git/hooks/git.py:
##########
@@ -77,27 +77,50 @@ def __init__(
         if self.key_file and self.private_key:
             raise AirflowException("Both 'key_file' and 'private_key' cannot 
be provided at the same time")
         self._process_git_auth_url()
+        self._setup_askpass()
 
     def _build_ssh_command(self, key_path: str) -> str:
         return (
             f"ssh -i {key_path} "
             f"-o IdentitiesOnly=yes "
             f"-o StrictHostKeyChecking={self.strict_host_key_checking}"
         )
-
+    
     def _process_git_auth_url(self):
         if not isinstance(self.repo_url, str):
             return
-        if self.auth_token and self.repo_url.startswith("https://";):
-            self.repo_url = self.repo_url.replace("https://";, 
f"https://{self.user_name}:{self.auth_token}@";)
-        elif self.auth_token and self.repo_url.startswith("http://";):
-            self.repo_url = self.repo_url.replace("http://";, 
f"http://{self.user_name}:{self.auth_token}@";)
-        elif self.repo_url.startswith("http://";):
-            # if no auth token, use the repo url as is
-            self.repo_url = self.repo_url
-        elif not self.repo_url.startswith("git@") or not 
self.repo_url.startswith("https://";):
+
+        if self.repo_url.startswith(("http://";, "https://";)):
+            if self.auth_token:
+                log.info(
+                    "Using token-based authentication via GIT_ASKPASS. "
+                    "Credentials are not embedded in the repository URL."
+                )
+            return
+
+        if not self.repo_url.startswith("git@"):
             self.repo_url = os.path.expanduser(self.repo_url)
 
+    def _setup_askpass(self):
+        if not self.auth_token:
+            return
+
+        script = f"""#!/bin/sh
+case "$1" in
+Username*) echo "{self.user_name}" ;;
+Password*) echo "{self.auth_token}" ;;
+esac
+"""
+
+        tmp = tempfile.NamedTemporaryFile(delete=False, mode="w")
+        tmp.write(script)
+        tmp.flush()
+        os.chmod(tmp.name, 0o700)
+
+        self.env["GIT_ASKPASS"] = tmp.name
+        self.env["GIT_TERMINAL_PROMPT"] = "0" 

Review Comment:
   Setting GIT_ASKPASS/GIT_TERMINAL_PROMPT into self.env here only helps 
callers that explicitly propagate hook.env into every git invocation. In the 
current codebase, some git operations use custom_environment with only 
GIT_SSH_COMMAND (e.g., GitDagBundle._fetch_bare_repo in 
providers/git/src/airflow/providers/git/bundles/git.py), so HTTPS token auth 
may work for the initial clone but fail on subsequent fetches/submodule 
operations. Consider providing a context manager/helper that applies the full 
env (including askpass) for all git commands executed by the bundle.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to