Nataneljpwd commented on code in PR #51893:
URL: https://github.com/apache/airflow/pull/51893#discussion_r2155434837


##########
providers/http/src/airflow/providers/http/hooks/http.py:
##########
@@ -194,21 +215,33 @@ def _extract_auth(self, connection: Connection) -> Any | 
None:
         return None
 
     def _configure_session_from_extra(
-        self, session: Session, connection: Connection, extra_options: 
dict[str, Any] | None = None

Review Comment:
   It might be better to pass an empty dict here as a default param instead or 
manually doing it later (in case we do not need it) However, it is not too 
important 



##########
providers/http/src/airflow/providers/http/hooks/http.py:
##########
@@ -194,21 +215,33 @@ def _extract_auth(self, connection: Connection) -> Any | 
None:
         return None
 
     def _configure_session_from_extra(
-        self, session: Session, connection: Connection, extra_options: 
dict[str, Any] | None = None
+        self, session: Session, connection: Connection, extra_options: 
dict[str, Any]
     ) -> Session:
-        if extra_options is None:
-            extra_options = {}
-        headers = _process_extra_options_from_connection(connection, 
extra_options)
-        session.proxies = extra_options.pop("proxies", 
extra_options.pop("proxy", {}))
-        session.stream = extra_options.pop("stream", False)
-        session.verify = extra_options.pop("verify", 
extra_options.pop("verify_ssl", True))
-        session.cert = extra_options.pop("cert", None)
-        session.max_redirects = extra_options.pop("max_redirects", 
DEFAULT_REDIRECT_LIMIT)
-        session.trust_env = extra_options.pop("trust_env", True)
+        """
+        Configure the session using both the extra field from the Connection 
and passed in extra_options.
+
+        :param session: (Session)
+        :param connection: HTTP Connection passed into Hook
+        :param extra_options: (dict)
+        :return: (Session)
+        """
+        # This is going to update self.merged_extra, which will be used below
+        conn_extra_options, self.merged_extra = 
_process_extra_options_from_connection(
+            connection, extra_options
+        )
+
+        session.proxies = self.merged_extra.pop("proxies", 
self.merged_extra.pop("proxy", {}))

Review Comment:
   Looks good, however, isn't it better to just use get with a default return 
value? And avoid deep copy?
   As it just adds an overhead and an extra dependency
   And you would need to keep track of 1 less variable which would reduce 
complexity.
   What was the reason for this implementation choice? Maybe I did not 
understand correctly 



##########
providers/http/src/airflow/providers/http/hooks/http.py:
##########
@@ -17,6 +17,7 @@
 # under the License.
 from __future__ import annotations
 
+import copy

Review Comment:
   Why use copy?



##########
providers/http/src/airflow/providers/http/hooks/http.py:
##########
@@ -159,8 +174,14 @@ def get_conn(
         connection = self.get_connection(self.http_conn_id)
         self._set_base_url(connection)
         session = self._configure_session_from_auth(session, connection)
+
+        # Since get_conn can be called outside of run, we'll check this again
+        extra_options = extra_options or {}
+
         if connection.extra or extra_options:

Review Comment:
   I think that this check can be removed as if we call the function either 
way, it will not modify the session unless it has the extra configuration 



##########
providers/http/src/airflow/providers/http/hooks/http.py:
##########
@@ -48,37 +49,50 @@ def _url_from_endpoint(base_url: str | None, endpoint: str 
| None) -> str:
     return (base_url or "") + (endpoint or "")
 
 
-def _process_extra_options_from_connection(conn: Connection, extra_options: 
dict[str, Any]) -> dict:
-    extra = conn.extra_dejson
-    stream = extra.pop("stream", None)
-    cert = extra.pop("cert", None)
-    proxies = extra.pop("proxies", extra.pop("proxy", None))
-    timeout = extra.pop("timeout", None)
-    verify_ssl = extra.pop("verify", extra.pop("verify_ssl", None))
-    allow_redirects = extra.pop("allow_redirects", None)
-    max_redirects = extra.pop("max_redirects", None)
-    trust_env = extra.pop("trust_env", None)
-    check_response = extra.pop("check_response", None)
-
-    if stream is not None and "stream" not in extra_options:
-        extra_options["stream"] = stream
-    if cert is not None and "cert" not in extra_options:
-        extra_options["cert"] = cert
-    if proxies is not None and "proxy" not in extra_options:
-        extra_options["proxy"] = proxies
-    if timeout is not None and "timeout" not in extra_options:
-        extra_options["timeout"] = timeout
-    if verify_ssl is not None and "verify_ssl" not in extra_options:
-        extra_options["verify_ssl"] = verify_ssl
-    if allow_redirects is not None and "allow_redirects" not in extra_options:
-        extra_options["allow_redirects"] = allow_redirects
-    if max_redirects is not None and "max_redirects" not in extra_options:
-        extra_options["max_redirects"] = max_redirects
-    if trust_env is not None and "trust_env" not in extra_options:
-        extra_options["trust_env"] = trust_env
-    if check_response is not None and "check_response" not in extra_options:
-        extra_options["check_response"] = check_response
-    return extra
+def _process_extra_options_from_connection(
+    conn: Connection, extra_options: dict[str, Any]
+) -> tuple[dict[str, Any], dict[str, Any]]:
+    """
+    Return the updated extra options from the connection, as well as those 
passed.
+
+    :param conn: The HTTP Connection object passed to the Hook
+    :param extra_options: Use-defined extra options
+    :return: (tuple)
+    """
+    # Copy, to prevent changing conn.extra_dejson and extra_options
+    conn_extra_options: dict = copy.deepcopy(conn.extra_dejson)

Review Comment:
   Why not avoid it with `dict.get(key, default_value)`?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@airflow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to