Nataneljpwd commented on code in PR #51893: URL: https://github.com/apache/airflow/pull/51893#discussion_r2155434837
########## providers/http/src/airflow/providers/http/hooks/http.py: ########## @@ -194,21 +215,33 @@ def _extract_auth(self, connection: Connection) -> Any | None: return None def _configure_session_from_extra( - self, session: Session, connection: Connection, extra_options: dict[str, Any] | None = None Review Comment: It might be better to pass an empty dict here as a default param instead or manually doing it later (in case we do not need it) However, it is not too important ########## providers/http/src/airflow/providers/http/hooks/http.py: ########## @@ -194,21 +215,33 @@ def _extract_auth(self, connection: Connection) -> Any | None: return None def _configure_session_from_extra( - self, session: Session, connection: Connection, extra_options: dict[str, Any] | None = None + self, session: Session, connection: Connection, extra_options: dict[str, Any] ) -> Session: - if extra_options is None: - extra_options = {} - headers = _process_extra_options_from_connection(connection, extra_options) - session.proxies = extra_options.pop("proxies", extra_options.pop("proxy", {})) - session.stream = extra_options.pop("stream", False) - session.verify = extra_options.pop("verify", extra_options.pop("verify_ssl", True)) - session.cert = extra_options.pop("cert", None) - session.max_redirects = extra_options.pop("max_redirects", DEFAULT_REDIRECT_LIMIT) - session.trust_env = extra_options.pop("trust_env", True) + """ + Configure the session using both the extra field from the Connection and passed in extra_options. + + :param session: (Session) + :param connection: HTTP Connection passed into Hook + :param extra_options: (dict) + :return: (Session) + """ + # This is going to update self.merged_extra, which will be used below + conn_extra_options, self.merged_extra = _process_extra_options_from_connection( + connection, extra_options + ) + + session.proxies = self.merged_extra.pop("proxies", self.merged_extra.pop("proxy", {})) Review Comment: Looks good, however, isn't it better to just use get with a default return value? And avoid deep copy? As it just adds an overhead and an extra dependency And you would need to keep track of 1 less variable which would reduce complexity. What was the reason for this implementation choice? Maybe I did not understand correctly ########## providers/http/src/airflow/providers/http/hooks/http.py: ########## @@ -17,6 +17,7 @@ # under the License. from __future__ import annotations +import copy Review Comment: Why use copy? ########## providers/http/src/airflow/providers/http/hooks/http.py: ########## @@ -159,8 +174,14 @@ def get_conn( connection = self.get_connection(self.http_conn_id) self._set_base_url(connection) session = self._configure_session_from_auth(session, connection) + + # Since get_conn can be called outside of run, we'll check this again + extra_options = extra_options or {} + if connection.extra or extra_options: Review Comment: I think that this check can be removed as if we call the function either way, it will not modify the session unless it has the extra configuration ########## providers/http/src/airflow/providers/http/hooks/http.py: ########## @@ -48,37 +49,50 @@ def _url_from_endpoint(base_url: str | None, endpoint: str | None) -> str: return (base_url or "") + (endpoint or "") -def _process_extra_options_from_connection(conn: Connection, extra_options: dict[str, Any]) -> dict: - extra = conn.extra_dejson - stream = extra.pop("stream", None) - cert = extra.pop("cert", None) - proxies = extra.pop("proxies", extra.pop("proxy", None)) - timeout = extra.pop("timeout", None) - verify_ssl = extra.pop("verify", extra.pop("verify_ssl", None)) - allow_redirects = extra.pop("allow_redirects", None) - max_redirects = extra.pop("max_redirects", None) - trust_env = extra.pop("trust_env", None) - check_response = extra.pop("check_response", None) - - if stream is not None and "stream" not in extra_options: - extra_options["stream"] = stream - if cert is not None and "cert" not in extra_options: - extra_options["cert"] = cert - if proxies is not None and "proxy" not in extra_options: - extra_options["proxy"] = proxies - if timeout is not None and "timeout" not in extra_options: - extra_options["timeout"] = timeout - if verify_ssl is not None and "verify_ssl" not in extra_options: - extra_options["verify_ssl"] = verify_ssl - if allow_redirects is not None and "allow_redirects" not in extra_options: - extra_options["allow_redirects"] = allow_redirects - if max_redirects is not None and "max_redirects" not in extra_options: - extra_options["max_redirects"] = max_redirects - if trust_env is not None and "trust_env" not in extra_options: - extra_options["trust_env"] = trust_env - if check_response is not None and "check_response" not in extra_options: - extra_options["check_response"] = check_response - return extra +def _process_extra_options_from_connection( + conn: Connection, extra_options: dict[str, Any] +) -> tuple[dict[str, Any], dict[str, Any]]: + """ + Return the updated extra options from the connection, as well as those passed. + + :param conn: The HTTP Connection object passed to the Hook + :param extra_options: Use-defined extra options + :return: (tuple) + """ + # Copy, to prevent changing conn.extra_dejson and extra_options + conn_extra_options: dict = copy.deepcopy(conn.extra_dejson) Review Comment: Why not avoid it with `dict.get(key, default_value)`? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@airflow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org