uranusjr commented on a change in pull request #16954:
URL: https://github.com/apache/airflow/pull/16954#discussion_r669167086



##########
File path: airflow/utils/python_virtualenv.py
##########
@@ -35,12 +37,35 @@ def _generate_virtualenv_cmd(tmp_dir: str, python_bin: str, 
system_site_packages
     return cmd
 
 
-def _generate_pip_install_cmd(tmp_dir: str, requirements: List[str]) -> 
Optional[List[str]]:
+def _generate_pip_install_cmd(tmp_dir: str,
+                              requirements: List[str],
+                              connection_id: Optional[str] = None
+                              ) -> Optional[List[str]]:
     if not requirements:
         return None
-    # direct path alleviates need to activate
-    cmd = [f'{tmp_dir}/bin/pip', 'install']
-    return cmd + requirements
+
+    if connection_id:
+        con: Connection = BaseHook.get_connection(connection_id)
+        user = con.login
+        schema = con.schema or 'https'
+        password = con.get_password()
+        port = con.port or 8080
+        host = con.host
+        host_suffix = con.extra_dejson.get('host_suffix', 
'repository/python/simple')

Review comment:
       The default seems arbitrary (specific to Artifactory?) PEP 503 does not 
require the index to be hosted under a suffix, so IMO this should default to an 
empty string. Also, `host_suffix` feels like it should be appended after 
`host`, not after the port part. How about calling this `path`? (This is the 
terminology used by `urllib.parse`.

##########
File path: airflow/utils/python_virtualenv.py
##########
@@ -35,12 +37,35 @@ def _generate_virtualenv_cmd(tmp_dir: str, python_bin: str, 
system_site_packages
     return cmd
 
 
-def _generate_pip_install_cmd(tmp_dir: str, requirements: List[str]) -> 
Optional[List[str]]:
+def _generate_pip_install_cmd(tmp_dir: str,
+                              requirements: List[str],
+                              connection_id: Optional[str] = None
+                              ) -> Optional[List[str]]:
     if not requirements:
         return None
-    # direct path alleviates need to activate
-    cmd = [f'{tmp_dir}/bin/pip', 'install']
-    return cmd + requirements
+
+    if connection_id:
+        con: Connection = BaseHook.get_connection(connection_id)
+        user = con.login
+        schema = con.schema or 'https'
+        password = con.get_password()
+        port = con.port or 8080
+        host = con.host
+        host_suffix = con.extra_dejson.get('host_suffix', 
'repository/python/simple')
+        if user:
+            index_url = 
os.path.join(f"{schema}://{user}:{password}@{host}:{port}", host_suffix)
+        else:
+            index_url = os.path.join(f"{schema}://{host}:{port}", host_suffix)

Review comment:
       Use `urllib.parse.urlunsplit` instead.

##########
File path: airflow/utils/python_virtualenv.py
##########
@@ -35,12 +37,35 @@ def _generate_virtualenv_cmd(tmp_dir: str, python_bin: str, 
system_site_packages
     return cmd
 
 
-def _generate_pip_install_cmd(tmp_dir: str, requirements: List[str]) -> 
Optional[List[str]]:
+def _generate_pip_install_cmd(tmp_dir: str,
+                              requirements: List[str],
+                              connection_id: Optional[str] = None
+                              ) -> Optional[List[str]]:
     if not requirements:
         return None
-    # direct path alleviates need to activate
-    cmd = [f'{tmp_dir}/bin/pip', 'install']
-    return cmd + requirements
+
+    if connection_id:
+        con: Connection = BaseHook.get_connection(connection_id)
+        user = con.login
+        schema = con.schema or 'https'
+        password = con.get_password()
+        port = con.port or 8080

Review comment:
       The default 8080 is also very arbitrary. The index is just an HTTP 
server, so this should default to have no explicit port.

##########
File path: airflow/utils/python_virtualenv.py
##########
@@ -35,12 +37,35 @@ def _generate_virtualenv_cmd(tmp_dir: str, python_bin: str, 
system_site_packages
     return cmd
 
 
-def _generate_pip_install_cmd(tmp_dir: str, requirements: List[str]) -> 
Optional[List[str]]:
+def _generate_pip_install_cmd(tmp_dir: str,
+                              requirements: List[str],
+                              connection_id: Optional[str] = None
+                              ) -> Optional[List[str]]:
     if not requirements:
         return None
-    # direct path alleviates need to activate
-    cmd = [f'{tmp_dir}/bin/pip', 'install']
-    return cmd + requirements
+
+    if connection_id:
+        con: Connection = BaseHook.get_connection(connection_id)
+        user = con.login
+        schema = con.schema or 'https'
+        password = con.get_password()
+        port = con.port or 8080
+        host = con.host
+        host_suffix = con.extra_dejson.get('host_suffix', 
'repository/python/simple')
+        if user:
+            index_url = 
os.path.join(f"{schema}://{user}:{password}@{host}:{port}", host_suffix)
+        else:
+            index_url = os.path.join(f"{schema}://{host}:{port}", host_suffix)
+        private_cmd = [f'{tmp_dir}/bin/pip',
+                       'install',
+                       f'--index-url', index_url,
+                       f'--extra-index-url', 'https://pypi.org/simple'

Review comment:
       This is highly discouraged for very complicated reasons (see the pip 
issue linked in my previous comment). I recommend setting *only* `index_url` to 
the custom connection. If you really really must use `--extra-index-url` (you 
still shouldn’t), set the custom repository as extra and keep the default to 
PyPI.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to