ephraimbuddy commented on code in PR #64105:
URL: https://github.com/apache/airflow/pull/64105#discussion_r3234010433


##########
providers/git/src/airflow/providers/git/hooks/git.py:
##########
@@ -145,20 +144,48 @@ def _build_ssh_command(self, key_path: str | None = None) 
-> str:
     def _process_git_auth_url(self):
         if not isinstance(self.repo_url, str):
             return
-        if self.auth_token and self.repo_url.startswith("https://";):
-            encoded_user = urlquote(self.user_name, safe="")
-            encoded_token = urlquote(self.auth_token, safe="")
-            self.repo_url = self.repo_url.replace("https://";, 
f"https://{encoded_user}:{encoded_token}@";, 1)
-        elif self.auth_token and self.repo_url.startswith("http://";):
-            encoded_user = urlquote(self.user_name, safe="")
-            encoded_token = urlquote(self.auth_token, safe="")
-            self.repo_url = self.repo_url.replace("http://";, 
f"http://{encoded_user}:{encoded_token}@";, 1)
-        elif self.repo_url.startswith("http://";):
-            # if no auth token, use the repo url as is
-            pass
-        elif not self.repo_url.startswith("git@") and not 
self.repo_url.startswith("https://";):
+
+        if not self.repo_url.startswith("git@") and not 
self.repo_url.startswith("https://";):
             self.repo_url = os.path.expanduser(self.repo_url)
 
+    @contextlib.contextmanager
+    def _token_askpass_env(self):
+        if not self.auth_token:
+            yield
+            return
+
+        raw_username = self.user_name or "git"
+        username = shlex.quote(raw_username)
+        password = shlex.quote(self.auth_token)
+
+        with tempfile.NamedTemporaryFile(mode="w", suffix=".sh", delete=True) 
as askpass_script:
+            script_content = f"""#!/bin/sh
+case "$1" in
+    *Username*) echo {username} ;;
+    *Password*) echo {password} ;;
+    *) exit 1 ;;
+esac
+"""
+            askpass_script.write(script_content)
+            askpass_script.flush()
+            os.chmod(askpass_script.name, stat.S_IRWXU)
+
+            old_git_askpass = self.env.get("GIT_ASKPASS")
+            old_git_terminal_prompt = self.env.get("GIT_TERMINAL_PROMPT")
+            try:
+                self.env["GIT_ASKPASS"] = askpass_script.name

Review Comment:
   **Blocker (same point as the top-level review).** `self.env` is a plain dict 
on the hook — setting `GIT_ASKPASS` here only reaches the subprocess when the 
caller explicitly passes `env=self.hook.env`. That happens for the initial bare 
clone but not for `_fetch_bare_repo`, `_fetch_submodules`, or `refresh()`, so 
HTTPS-with-token refreshes will fail (and may hang, since `GIT_TERMINAL_PROMPT` 
is also missing from the real env).
   
   Mirror what `_passphrase_askpass_env` does just above — set both 
`os.environ` and `self.env`, and restore both in `finally`:
   
   ```python
   old_os_askpass = os.environ.get("GIT_ASKPASS")
   old_os_prompt = os.environ.get("GIT_TERMINAL_PROMPT")
   old_env_askpass = self.env.get("GIT_ASKPASS")
   old_env_prompt = self.env.get("GIT_TERMINAL_PROMPT")
   try:
       os.environ["GIT_ASKPASS"] = askpass_script.name
       os.environ["GIT_TERMINAL_PROMPT"] = "0"
       self.env["GIT_ASKPASS"] = askpass_script.name
       self.env["GIT_TERMINAL_PROMPT"] = "0"
       yield
   finally:
       for store, key, old in [
           (os.environ, "GIT_ASKPASS", old_os_askpass),
           (os.environ, "GIT_TERMINAL_PROMPT", old_os_prompt),
           (self.env, "GIT_ASKPASS", old_env_askpass),
           (self.env, "GIT_TERMINAL_PROMPT", old_env_prompt),
       ]:
           if old is None:
               store.pop(key, None)
           else:
               store[key] = old
   ```
   
   ---
   Drafted-by: Claude Code (Opus 4.7); reviewed by @ephraimbuddy before posting



##########
providers/git/src/airflow/providers/git/hooks/git.py:
##########
@@ -145,20 +144,48 @@ def _build_ssh_command(self, key_path: str | None = None) 
-> str:
     def _process_git_auth_url(self):
         if not isinstance(self.repo_url, str):
             return
-        if self.auth_token and self.repo_url.startswith("https://";):
-            encoded_user = urlquote(self.user_name, safe="")
-            encoded_token = urlquote(self.auth_token, safe="")
-            self.repo_url = self.repo_url.replace("https://";, 
f"https://{encoded_user}:{encoded_token}@";, 1)
-        elif self.auth_token and self.repo_url.startswith("http://";):
-            encoded_user = urlquote(self.user_name, safe="")
-            encoded_token = urlquote(self.auth_token, safe="")
-            self.repo_url = self.repo_url.replace("http://";, 
f"http://{encoded_user}:{encoded_token}@";, 1)
-        elif self.repo_url.startswith("http://";):
-            # if no auth token, use the repo url as is
-            pass
-        elif not self.repo_url.startswith("git@") and not 
self.repo_url.startswith("https://";):
+
+        if not self.repo_url.startswith("git@") and not 
self.repo_url.startswith("https://";):
             self.repo_url = os.path.expanduser(self.repo_url)
 
+    @contextlib.contextmanager
+    def _token_askpass_env(self):
+        if not self.auth_token:
+            yield
+            return
+
+        raw_username = self.user_name or "git"

Review Comment:
   Nit: `self.user_name` is set in `__init__` as `connection.login or "user"`, 
so it's never falsy by the time we get here — the `or "git"` branch is 
unreachable.
   
   Worth picking one default though, because `_process_git_auth_url` uses 
`"user"` and this method uses `"git"`. For GitHub fine-grained tokens the 
username is ignored, but GitLab deploy tokens and some Bitbucket setups care 
about which username is sent. I'd either standardise on one default in 
`__init__` (and drop the fallback here), or document why they differ.
   
   ---
   Drafted-by: Claude Code (Opus 4.7); reviewed by @ephraimbuddy before posting



##########
providers/git/tests/unit/git/hooks/test_git.py:
##########
@@ -352,3 +353,40 @@ def test_passphrase_askpass_cleaned_up(self, 
create_connection_without_db):
             assert os.path.exists(askpass_path)
         # Both the askpass script and the temp key file should be cleaned up
         assert not os.path.exists(askpass_path)
+
+    def test_token_askpass_env_and_cleanup(self):

Review Comment:
   These two new tests assert the dict contents and the script body, but never 
exercise a real `git` call, so they wouldn't have caught (and don't guard 
against) the env-propagation bug noted on the source side.
   
   Could you add a test that monkey-patches `Repo.clone_from` (and ideally 
`origin.fetch`) to capture the `env` argument actually handed to gitpython, and 
asserts `GIT_ASKPASS` and `GIT_TERMINAL_PROMPT` are present for HTTPS+token? A 
bundle-level test in `tests/unit/git/bundles/test_git.py` is probably the right 
place since the bundle is what owns the propagation.
   
   Also — `ACCESS_TOKEN = "my_access_token"` has no shell-special chars, so 
`shlex.quote(ACCESS_TOKEN)` returns it unquoted and this assertion silently 
doesn't exercise the quoting path. Parametrising with a token like 
`"tok$with'quote"` would actually cover it.
   
   ---
   Drafted-by: Claude Code (Opus 4.7); reviewed by @ephraimbuddy before posting



##########
providers/git/src/airflow/providers/git/hooks/git.py:
##########
@@ -145,20 +144,48 @@ def _build_ssh_command(self, key_path: str | None = None) 
-> str:
     def _process_git_auth_url(self):

Review Comment:
   After this PR the method only handles `~` expansion for local paths — 
there's no "auth url" logic left in it. The name is misleading now; consider 
renaming to `_normalize_repo_url` (or inlining the one remaining line into 
`__init__`).
   
   ---
   Drafted-by: Claude Code (Opus 4.7); reviewed by @ephraimbuddy before posting



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to