Nataneljpwd commented on code in PR #51893:
URL: https://github.com/apache/airflow/pull/51893#discussion_r2155434837
##########
providers/http/src/airflow/providers/http/hooks/http.py:
##########
@@ -194,21 +215,33 @@ def _extract_auth(self, connection: Connection) -> Any |
None:
return None
def _configure_session_from_extra(
- self, session: Session, connection: Connection, extra_options:
dict[str, Any] | None = None
Review Comment:
It might be better to pass an empty dict here as a default param instead or
manually doing it later (in case we do not need it) However, it is not too
important
##########
providers/http/src/airflow/providers/http/hooks/http.py:
##########
@@ -194,21 +215,33 @@ def _extract_auth(self, connection: Connection) -> Any |
None:
return None
def _configure_session_from_extra(
- self, session: Session, connection: Connection, extra_options:
dict[str, Any] | None = None
+ self, session: Session, connection: Connection, extra_options:
dict[str, Any]
) -> Session:
- if extra_options is None:
- extra_options = {}
- headers = _process_extra_options_from_connection(connection,
extra_options)
- session.proxies = extra_options.pop("proxies",
extra_options.pop("proxy", {}))
- session.stream = extra_options.pop("stream", False)
- session.verify = extra_options.pop("verify",
extra_options.pop("verify_ssl", True))
- session.cert = extra_options.pop("cert", None)
- session.max_redirects = extra_options.pop("max_redirects",
DEFAULT_REDIRECT_LIMIT)
- session.trust_env = extra_options.pop("trust_env", True)
+ """
+ Configure the session using both the extra field from the Connection
and passed in extra_options.
+
+ :param session: (Session)
+ :param connection: HTTP Connection passed into Hook
+ :param extra_options: (dict)
+ :return: (Session)
+ """
+ # This is going to update self.merged_extra, which will be used below
+ conn_extra_options, self.merged_extra =
_process_extra_options_from_connection(
+ connection, extra_options
+ )
+
+ session.proxies = self.merged_extra.pop("proxies",
self.merged_extra.pop("proxy", {}))
Review Comment:
Looks good, however, isn't it better to just use get with a default return
value? And avoid deep copy?
As it just adds an overhead and an extra dependency
And you would need to keep track of 1 less variable which would reduce
complexity.
What was the reason for this implementation choice? Maybe I did not
understand correctly
##########
providers/http/src/airflow/providers/http/hooks/http.py:
##########
@@ -17,6 +17,7 @@
# under the License.
from __future__ import annotations
+import copy
Review Comment:
Why use copy?
##########
providers/http/src/airflow/providers/http/hooks/http.py:
##########
@@ -159,8 +174,14 @@ def get_conn(
connection = self.get_connection(self.http_conn_id)
self._set_base_url(connection)
session = self._configure_session_from_auth(session, connection)
+
+ # Since get_conn can be called outside of run, we'll check this again
+ extra_options = extra_options or {}
+
if connection.extra or extra_options:
Review Comment:
I think that this check can be removed as if we call the function either
way, it will not modify the session unless it has the extra configuration
##########
providers/http/src/airflow/providers/http/hooks/http.py:
##########
@@ -48,37 +49,50 @@ def _url_from_endpoint(base_url: str | None, endpoint: str
| None) -> str:
return (base_url or "") + (endpoint or "")
-def _process_extra_options_from_connection(conn: Connection, extra_options:
dict[str, Any]) -> dict:
- extra = conn.extra_dejson
- stream = extra.pop("stream", None)
- cert = extra.pop("cert", None)
- proxies = extra.pop("proxies", extra.pop("proxy", None))
- timeout = extra.pop("timeout", None)
- verify_ssl = extra.pop("verify", extra.pop("verify_ssl", None))
- allow_redirects = extra.pop("allow_redirects", None)
- max_redirects = extra.pop("max_redirects", None)
- trust_env = extra.pop("trust_env", None)
- check_response = extra.pop("check_response", None)
-
- if stream is not None and "stream" not in extra_options:
- extra_options["stream"] = stream
- if cert is not None and "cert" not in extra_options:
- extra_options["cert"] = cert
- if proxies is not None and "proxy" not in extra_options:
- extra_options["proxy"] = proxies
- if timeout is not None and "timeout" not in extra_options:
- extra_options["timeout"] = timeout
- if verify_ssl is not None and "verify_ssl" not in extra_options:
- extra_options["verify_ssl"] = verify_ssl
- if allow_redirects is not None and "allow_redirects" not in extra_options:
- extra_options["allow_redirects"] = allow_redirects
- if max_redirects is not None and "max_redirects" not in extra_options:
- extra_options["max_redirects"] = max_redirects
- if trust_env is not None and "trust_env" not in extra_options:
- extra_options["trust_env"] = trust_env
- if check_response is not None and "check_response" not in extra_options:
- extra_options["check_response"] = check_response
- return extra
+def _process_extra_options_from_connection(
+ conn: Connection, extra_options: dict[str, Any]
+) -> tuple[dict[str, Any], dict[str, Any]]:
+ """
+ Return the updated extra options from the connection, as well as those
passed.
+
+ :param conn: The HTTP Connection object passed to the Hook
+ :param extra_options: Use-defined extra options
+ :return: (tuple)
+ """
+ # Copy, to prevent changing conn.extra_dejson and extra_options
+ conn_extra_options: dict = copy.deepcopy(conn.extra_dejson)
Review Comment:
Why not avoid it with `dict.get(key, default_value)`?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]