Lee-W commented on code in PR #44302:
URL: https://github.com/apache/airflow/pull/44302#discussion_r1857883338


##########
providers/src/airflow/providers/http/hooks/http.py:
##########
@@ -102,49 +120,68 @@ def get_conn(self, headers: dict[Any, Any] | None = None) 
-> requests.Session:
         """
         Create a Requests HTTP session.
 
-        :param headers: additional headers to be passed through as a dictionary
+        :param headers: Additional headers to be passed through as a 
dictionary.
+        :return: A configured requests.Session object.
         """
         session = requests.Session()
+        connection = self.get_connection(self.http_conn_id)
 
-        if self.http_conn_id:
-            conn = self.get_connection(self.http_conn_id)
+        self._set_base_url(connection)
+        self._set_auth(session, connection)
+        self._set_extra(session, connection)
+        self._mount_adapters(session)
 
-            if conn.host and "://" in conn.host:
-                self.base_url = conn.host
-            else:
-                # schema defaults to HTTP
-                schema = conn.schema if conn.schema else "http"
-                host = conn.host if conn.host else ""
-                self.base_url = f"{schema}://{host}"
-
-            if conn.port:
-                self.base_url += f":{conn.port}"
-            if conn.login:
-                session.auth = self.auth_type(conn.login, conn.password)
-            elif self._auth_type:
-                session.auth = self.auth_type()
-            if conn.extra:
-                extra = conn.extra_dejson
-                extra.pop(
-                    "timeout", None
-                )  # ignore this as timeout is only accepted in request method 
of Session
-                extra.pop("allow_redirects", None)  # ignore this as only 
max_redirects is accepted in Session
-                session.proxies = extra.pop("proxies", extra.pop("proxy", {}))
-                session.stream = extra.pop("stream", False)
-                session.verify = extra.pop("verify", extra.pop("verify_ssl", 
True))
-                session.cert = extra.pop("cert", None)
-                session.max_redirects = extra.pop("max_redirects", 
DEFAULT_REDIRECT_LIMIT)
-                session.trust_env = extra.pop("trust_env", True)
-
-                try:
-                    session.headers.update(extra)
-                except TypeError:
-                    self.log.warning("Connection to %s has invalid extra 
field.", conn.host)
         if headers:
             session.headers.update(headers)
 
         return session
 
+    def _set_base_url(self, connection) -> None:
+        host = connection.host or ""
+        schema = connection.schema or "http"
+        if "://" in host:
+            self.base_url = host
+        else:
+            self.base_url = f"{schema}://{host}" if host else f"{schema}://"
+            if connection.port:
+                self.base_url += f":{connection.port}"
+        parsed = urlparse(self.base_url)
+        if not parsed.scheme:
+            raise ValueError(f"Invalid base URL: Missing scheme in 
{self.base_url}")
+
+    def _set_auth(self, session: requests.Session, connection) -> None:

Review Comment:
   ```suggestion
       def _set_auth(self, session: requests.Session, connection: Connection) 
-> None:
   ```



##########
providers/src/airflow/providers/http/hooks/http.py:
##########
@@ -72,6 +75,7 @@ def __init__(
         method: str = "POST",
         http_conn_id: str = default_conn_name,
         auth_type: Any = None,
+        adapter: HTTPAdapter | None = None,

Review Comment:
   We should move this one to the last parameter or make it a keyword only 
argument. Otherwise, it would be a breaking change. I would suggest making it a 
keyword only argument



##########
providers/src/airflow/providers/http/hooks/http.py:
##########
@@ -102,49 +120,68 @@ def get_conn(self, headers: dict[Any, Any] | None = None) 
-> requests.Session:
         """
         Create a Requests HTTP session.
 
-        :param headers: additional headers to be passed through as a dictionary
+        :param headers: Additional headers to be passed through as a 
dictionary.
+        :return: A configured requests.Session object.
         """
         session = requests.Session()
+        connection = self.get_connection(self.http_conn_id)
 
-        if self.http_conn_id:
-            conn = self.get_connection(self.http_conn_id)
+        self._set_base_url(connection)
+        self._set_auth(session, connection)
+        self._set_extra(session, connection)
+        self._mount_adapters(session)
 
-            if conn.host and "://" in conn.host:
-                self.base_url = conn.host
-            else:
-                # schema defaults to HTTP
-                schema = conn.schema if conn.schema else "http"
-                host = conn.host if conn.host else ""
-                self.base_url = f"{schema}://{host}"
-
-            if conn.port:
-                self.base_url += f":{conn.port}"
-            if conn.login:
-                session.auth = self.auth_type(conn.login, conn.password)
-            elif self._auth_type:
-                session.auth = self.auth_type()
-            if conn.extra:
-                extra = conn.extra_dejson
-                extra.pop(
-                    "timeout", None
-                )  # ignore this as timeout is only accepted in request method 
of Session
-                extra.pop("allow_redirects", None)  # ignore this as only 
max_redirects is accepted in Session
-                session.proxies = extra.pop("proxies", extra.pop("proxy", {}))
-                session.stream = extra.pop("stream", False)
-                session.verify = extra.pop("verify", extra.pop("verify_ssl", 
True))
-                session.cert = extra.pop("cert", None)
-                session.max_redirects = extra.pop("max_redirects", 
DEFAULT_REDIRECT_LIMIT)
-                session.trust_env = extra.pop("trust_env", True)
-
-                try:
-                    session.headers.update(extra)
-                except TypeError:
-                    self.log.warning("Connection to %s has invalid extra 
field.", conn.host)
         if headers:
             session.headers.update(headers)
 
         return session
 
+    def _set_base_url(self, connection) -> None:
+        host = connection.host or ""
+        schema = connection.schema or "http"
+        if "://" in host:
+            self.base_url = host
+        else:
+            self.base_url = f"{schema}://{host}" if host else f"{schema}://"
+            if connection.port:
+                self.base_url += f":{connection.port}"
+        parsed = urlparse(self.base_url)
+        if not parsed.scheme:
+            raise ValueError(f"Invalid base URL: Missing scheme in 
{self.base_url}")
+
+    def _set_auth(self, session: requests.Session, connection) -> None:
+        if connection.login:
+            session.auth = self.auth_type(connection.login, 
connection.password)
+        elif self._auth_type:
+            session.auth = self.auth_type()
+
+    def _set_extra(self, session: requests.Session, connection) -> None:
+        if connection.extra:

Review Comment:
   We should probably do this `if` check in `get_conn` instead



##########
providers/src/airflow/providers/http/hooks/http.py:
##########
@@ -102,49 +120,68 @@ def get_conn(self, headers: dict[Any, Any] | None = None) 
-> requests.Session:
         """
         Create a Requests HTTP session.
 
-        :param headers: additional headers to be passed through as a dictionary
+        :param headers: Additional headers to be passed through as a 
dictionary.
+        :return: A configured requests.Session object.
         """
         session = requests.Session()
+        connection = self.get_connection(self.http_conn_id)
 
-        if self.http_conn_id:
-            conn = self.get_connection(self.http_conn_id)
+        self._set_base_url(connection)
+        self._set_auth(session, connection)
+        self._set_extra(session, connection)
+        self._mount_adapters(session)
 
-            if conn.host and "://" in conn.host:
-                self.base_url = conn.host
-            else:
-                # schema defaults to HTTP
-                schema = conn.schema if conn.schema else "http"
-                host = conn.host if conn.host else ""
-                self.base_url = f"{schema}://{host}"
-
-            if conn.port:
-                self.base_url += f":{conn.port}"
-            if conn.login:
-                session.auth = self.auth_type(conn.login, conn.password)
-            elif self._auth_type:
-                session.auth = self.auth_type()
-            if conn.extra:
-                extra = conn.extra_dejson
-                extra.pop(
-                    "timeout", None
-                )  # ignore this as timeout is only accepted in request method 
of Session
-                extra.pop("allow_redirects", None)  # ignore this as only 
max_redirects is accepted in Session
-                session.proxies = extra.pop("proxies", extra.pop("proxy", {}))
-                session.stream = extra.pop("stream", False)
-                session.verify = extra.pop("verify", extra.pop("verify_ssl", 
True))
-                session.cert = extra.pop("cert", None)
-                session.max_redirects = extra.pop("max_redirects", 
DEFAULT_REDIRECT_LIMIT)
-                session.trust_env = extra.pop("trust_env", True)
-
-                try:
-                    session.headers.update(extra)
-                except TypeError:
-                    self.log.warning("Connection to %s has invalid extra 
field.", conn.host)
         if headers:
             session.headers.update(headers)
 
         return session
 
+    def _set_base_url(self, connection) -> None:
+        host = connection.host or ""
+        schema = connection.schema or "http"
+        if "://" in host:
+            self.base_url = host
+        else:
+            self.base_url = f"{schema}://{host}" if host else f"{schema}://"
+            if connection.port:
+                self.base_url += f":{connection.port}"
+        parsed = urlparse(self.base_url)
+        if not parsed.scheme:
+            raise ValueError(f"Invalid base URL: Missing scheme in 
{self.base_url}")
+
+    def _set_auth(self, session: requests.Session, connection) -> None:
+        if connection.login:
+            session.auth = self.auth_type(connection.login, 
connection.password)
+        elif self._auth_type:
+            session.auth = self.auth_type()
+
+    def _set_extra(self, session: requests.Session, connection) -> None:

Review Comment:
   ```suggestion
       def _set_extra(self, session: requests.Session, connection: Connection) 
-> None:
   ```



##########
providers/src/airflow/providers/http/hooks/http.py:
##########
@@ -102,49 +120,68 @@ def get_conn(self, headers: dict[Any, Any] | None = None) 
-> requests.Session:
         """
         Create a Requests HTTP session.
 
-        :param headers: additional headers to be passed through as a dictionary
+        :param headers: Additional headers to be passed through as a 
dictionary.
+        :return: A configured requests.Session object.
         """
         session = requests.Session()
+        connection = self.get_connection(self.http_conn_id)
 
-        if self.http_conn_id:
-            conn = self.get_connection(self.http_conn_id)
+        self._set_base_url(connection)
+        self._set_auth(session, connection)
+        self._set_extra(session, connection)
+        self._mount_adapters(session)
 
-            if conn.host and "://" in conn.host:
-                self.base_url = conn.host
-            else:
-                # schema defaults to HTTP
-                schema = conn.schema if conn.schema else "http"
-                host = conn.host if conn.host else ""
-                self.base_url = f"{schema}://{host}"
-
-            if conn.port:
-                self.base_url += f":{conn.port}"
-            if conn.login:
-                session.auth = self.auth_type(conn.login, conn.password)
-            elif self._auth_type:
-                session.auth = self.auth_type()
-            if conn.extra:
-                extra = conn.extra_dejson
-                extra.pop(
-                    "timeout", None
-                )  # ignore this as timeout is only accepted in request method 
of Session
-                extra.pop("allow_redirects", None)  # ignore this as only 
max_redirects is accepted in Session
-                session.proxies = extra.pop("proxies", extra.pop("proxy", {}))
-                session.stream = extra.pop("stream", False)
-                session.verify = extra.pop("verify", extra.pop("verify_ssl", 
True))
-                session.cert = extra.pop("cert", None)
-                session.max_redirects = extra.pop("max_redirects", 
DEFAULT_REDIRECT_LIMIT)
-                session.trust_env = extra.pop("trust_env", True)
-
-                try:
-                    session.headers.update(extra)
-                except TypeError:
-                    self.log.warning("Connection to %s has invalid extra 
field.", conn.host)
         if headers:
             session.headers.update(headers)
 
         return session
 
+    def _set_base_url(self, connection) -> None:

Review Comment:
   ```suggestion
       def _set_base_url(self, connection: Connection) -> None:
   ```



##########
providers/src/airflow/providers/http/hooks/http.py:
##########
@@ -102,49 +120,68 @@ def get_conn(self, headers: dict[Any, Any] | None = None) 
-> requests.Session:
         """
         Create a Requests HTTP session.
 
-        :param headers: additional headers to be passed through as a dictionary
+        :param headers: Additional headers to be passed through as a 
dictionary.
+        :return: A configured requests.Session object.
         """
         session = requests.Session()
+        connection = self.get_connection(self.http_conn_id)
 
-        if self.http_conn_id:
-            conn = self.get_connection(self.http_conn_id)
+        self._set_base_url(connection)
+        self._set_auth(session, connection)
+        self._set_extra(session, connection)
+        self._mount_adapters(session)
 
-            if conn.host and "://" in conn.host:
-                self.base_url = conn.host
-            else:
-                # schema defaults to HTTP
-                schema = conn.schema if conn.schema else "http"
-                host = conn.host if conn.host else ""
-                self.base_url = f"{schema}://{host}"
-
-            if conn.port:
-                self.base_url += f":{conn.port}"
-            if conn.login:
-                session.auth = self.auth_type(conn.login, conn.password)
-            elif self._auth_type:
-                session.auth = self.auth_type()
-            if conn.extra:
-                extra = conn.extra_dejson
-                extra.pop(
-                    "timeout", None
-                )  # ignore this as timeout is only accepted in request method 
of Session
-                extra.pop("allow_redirects", None)  # ignore this as only 
max_redirects is accepted in Session
-                session.proxies = extra.pop("proxies", extra.pop("proxy", {}))
-                session.stream = extra.pop("stream", False)
-                session.verify = extra.pop("verify", extra.pop("verify_ssl", 
True))
-                session.cert = extra.pop("cert", None)
-                session.max_redirects = extra.pop("max_redirects", 
DEFAULT_REDIRECT_LIMIT)
-                session.trust_env = extra.pop("trust_env", True)
-
-                try:
-                    session.headers.update(extra)
-                except TypeError:
-                    self.log.warning("Connection to %s has invalid extra 
field.", conn.host)
         if headers:
             session.headers.update(headers)
 
         return session
 
+    def _set_base_url(self, connection) -> None:
+        host = connection.host or ""
+        schema = connection.schema or "http"
+        if "://" in host:
+            self.base_url = host
+        else:
+            self.base_url = f"{schema}://{host}" if host else f"{schema}://"
+            if connection.port:
+                self.base_url += f":{connection.port}"

Review Comment:
   ```suggestion
                   self.base_url = f"{self.base_url}:{connection.port}"
   ```



##########
providers/src/airflow/providers/http/hooks/http.py:
##########
@@ -102,49 +120,68 @@ def get_conn(self, headers: dict[Any, Any] | None = None) 
-> requests.Session:
         """
         Create a Requests HTTP session.
 
-        :param headers: additional headers to be passed through as a dictionary
+        :param headers: Additional headers to be passed through as a 
dictionary.
+        :return: A configured requests.Session object.
         """
         session = requests.Session()
+        connection = self.get_connection(self.http_conn_id)
 
-        if self.http_conn_id:
-            conn = self.get_connection(self.http_conn_id)
+        self._set_base_url(connection)
+        self._set_auth(session, connection)
+        self._set_extra(session, connection)
+        self._mount_adapters(session)
 
-            if conn.host and "://" in conn.host:
-                self.base_url = conn.host
-            else:
-                # schema defaults to HTTP
-                schema = conn.schema if conn.schema else "http"
-                host = conn.host if conn.host else ""
-                self.base_url = f"{schema}://{host}"
-
-            if conn.port:
-                self.base_url += f":{conn.port}"
-            if conn.login:
-                session.auth = self.auth_type(conn.login, conn.password)
-            elif self._auth_type:
-                session.auth = self.auth_type()
-            if conn.extra:
-                extra = conn.extra_dejson
-                extra.pop(
-                    "timeout", None
-                )  # ignore this as timeout is only accepted in request method 
of Session
-                extra.pop("allow_redirects", None)  # ignore this as only 
max_redirects is accepted in Session
-                session.proxies = extra.pop("proxies", extra.pop("proxy", {}))
-                session.stream = extra.pop("stream", False)
-                session.verify = extra.pop("verify", extra.pop("verify_ssl", 
True))
-                session.cert = extra.pop("cert", None)
-                session.max_redirects = extra.pop("max_redirects", 
DEFAULT_REDIRECT_LIMIT)
-                session.trust_env = extra.pop("trust_env", True)
-
-                try:
-                    session.headers.update(extra)
-                except TypeError:
-                    self.log.warning("Connection to %s has invalid extra 
field.", conn.host)
         if headers:
             session.headers.update(headers)
 
         return session
 
+    def _set_base_url(self, connection) -> None:
+        host = connection.host or ""
+        schema = connection.schema or "http"
+        if "://" in host:
+            self.base_url = host
+        else:
+            self.base_url = f"{schema}://{host}" if host else f"{schema}://"
+            if connection.port:
+                self.base_url += f":{connection.port}"
+        parsed = urlparse(self.base_url)
+        if not parsed.scheme:
+            raise ValueError(f"Invalid base URL: Missing scheme in 
{self.base_url}")
+
+    def _set_auth(self, session: requests.Session, connection) -> None:
+        if connection.login:
+            session.auth = self.auth_type(connection.login, 
connection.password)
+        elif self._auth_type:
+            session.auth = self.auth_type()

Review Comment:
   ```suggestion
       def _extract_auth(self, connection: Connection) -> Any | None:
           if connection.login:
               return self.auth_type(connection.login, connection.password)
           elif self._auth_type:
               return self.auth_type()
   ```
   
   or this might also work
   
   if 
   



##########
providers/src/airflow/providers/http/hooks/http.py:
##########
@@ -102,49 +120,68 @@ def get_conn(self, headers: dict[Any, Any] | None = None) 
-> requests.Session:
         """
         Create a Requests HTTP session.
 
-        :param headers: additional headers to be passed through as a dictionary
+        :param headers: Additional headers to be passed through as a 
dictionary.
+        :return: A configured requests.Session object.
         """
         session = requests.Session()
+        connection = self.get_connection(self.http_conn_id)
 
-        if self.http_conn_id:
-            conn = self.get_connection(self.http_conn_id)
+        self._set_base_url(connection)
+        self._set_auth(session, connection)
+        self._set_extra(session, connection)
+        self._mount_adapters(session)
 
-            if conn.host and "://" in conn.host:
-                self.base_url = conn.host
-            else:
-                # schema defaults to HTTP
-                schema = conn.schema if conn.schema else "http"
-                host = conn.host if conn.host else ""
-                self.base_url = f"{schema}://{host}"
-
-            if conn.port:
-                self.base_url += f":{conn.port}"
-            if conn.login:
-                session.auth = self.auth_type(conn.login, conn.password)
-            elif self._auth_type:
-                session.auth = self.auth_type()
-            if conn.extra:
-                extra = conn.extra_dejson
-                extra.pop(
-                    "timeout", None
-                )  # ignore this as timeout is only accepted in request method 
of Session
-                extra.pop("allow_redirects", None)  # ignore this as only 
max_redirects is accepted in Session
-                session.proxies = extra.pop("proxies", extra.pop("proxy", {}))
-                session.stream = extra.pop("stream", False)
-                session.verify = extra.pop("verify", extra.pop("verify_ssl", 
True))
-                session.cert = extra.pop("cert", None)
-                session.max_redirects = extra.pop("max_redirects", 
DEFAULT_REDIRECT_LIMIT)
-                session.trust_env = extra.pop("trust_env", True)
-
-                try:
-                    session.headers.update(extra)
-                except TypeError:
-                    self.log.warning("Connection to %s has invalid extra 
field.", conn.host)
         if headers:
             session.headers.update(headers)
 
         return session
 
+    def _set_base_url(self, connection) -> None:
+        host = connection.host or ""
+        schema = connection.schema or "http"
+        if "://" in host:
+            self.base_url = host
+        else:
+            self.base_url = f"{schema}://{host}" if host else f"{schema}://"
+            if connection.port:
+                self.base_url += f":{connection.port}"
+        parsed = urlparse(self.base_url)
+        if not parsed.scheme:
+            raise ValueError(f"Invalid base URL: Missing scheme in 
{self.base_url}")
+
+    def _set_auth(self, session: requests.Session, connection) -> None:
+        if connection.login:
+            session.auth = self.auth_type(connection.login, 
connection.password)
+        elif self._auth_type:
+            session.auth = self.auth_type()

Review Comment:
   Setting `session` this way would work. But it might confuse some 
contributors as we're changing the argument. I might suggest reason the 
`session` object and reassign it in `get_conn` instead.
   
   something like `session = self._set_auth(...)`



##########
providers/src/airflow/providers/http/hooks/http.py:
##########
@@ -102,49 +120,68 @@ def get_conn(self, headers: dict[Any, Any] | None = None) 
-> requests.Session:
         """
         Create a Requests HTTP session.
 
-        :param headers: additional headers to be passed through as a dictionary
+        :param headers: Additional headers to be passed through as a 
dictionary.
+        :return: A configured requests.Session object.
         """
         session = requests.Session()
+        connection = self.get_connection(self.http_conn_id)
 
-        if self.http_conn_id:
-            conn = self.get_connection(self.http_conn_id)
+        self._set_base_url(connection)
+        self._set_auth(session, connection)
+        self._set_extra(session, connection)
+        self._mount_adapters(session)
 
-            if conn.host and "://" in conn.host:
-                self.base_url = conn.host
-            else:
-                # schema defaults to HTTP
-                schema = conn.schema if conn.schema else "http"
-                host = conn.host if conn.host else ""
-                self.base_url = f"{schema}://{host}"
-
-            if conn.port:
-                self.base_url += f":{conn.port}"
-            if conn.login:
-                session.auth = self.auth_type(conn.login, conn.password)
-            elif self._auth_type:
-                session.auth = self.auth_type()
-            if conn.extra:
-                extra = conn.extra_dejson
-                extra.pop(
-                    "timeout", None
-                )  # ignore this as timeout is only accepted in request method 
of Session
-                extra.pop("allow_redirects", None)  # ignore this as only 
max_redirects is accepted in Session
-                session.proxies = extra.pop("proxies", extra.pop("proxy", {}))
-                session.stream = extra.pop("stream", False)
-                session.verify = extra.pop("verify", extra.pop("verify_ssl", 
True))
-                session.cert = extra.pop("cert", None)
-                session.max_redirects = extra.pop("max_redirects", 
DEFAULT_REDIRECT_LIMIT)
-                session.trust_env = extra.pop("trust_env", True)
-
-                try:
-                    session.headers.update(extra)
-                except TypeError:
-                    self.log.warning("Connection to %s has invalid extra 
field.", conn.host)
         if headers:
             session.headers.update(headers)
 
         return session
 
+    def _set_base_url(self, connection) -> None:
+        host = connection.host or ""
+        schema = connection.schema or "http"
+        if "://" in host:
+            self.base_url = host
+        else:
+            self.base_url = f"{schema}://{host}" if host else f"{schema}://"
+            if connection.port:
+                self.base_url += f":{connection.port}"

Review Comment:
   ```suggestion
           if connection.port:
               self.base_url += f":{connection.port}"
   ```
   
   I think it should be dedented 🤔  Could you please check whether I'm 
mistaken? If I'm not, would be better if we could add one unit test to cover it



##########
providers/src/airflow/providers/http/hooks/http.py:
##########
@@ -102,49 +120,68 @@ def get_conn(self, headers: dict[Any, Any] | None = None) 
-> requests.Session:
         """
         Create a Requests HTTP session.
 
-        :param headers: additional headers to be passed through as a dictionary
+        :param headers: Additional headers to be passed through as a 
dictionary.
+        :return: A configured requests.Session object.
         """
         session = requests.Session()
+        connection = self.get_connection(self.http_conn_id)
 
-        if self.http_conn_id:
-            conn = self.get_connection(self.http_conn_id)
+        self._set_base_url(connection)
+        self._set_auth(session, connection)
+        self._set_extra(session, connection)
+        self._mount_adapters(session)
 
-            if conn.host and "://" in conn.host:
-                self.base_url = conn.host
-            else:
-                # schema defaults to HTTP
-                schema = conn.schema if conn.schema else "http"
-                host = conn.host if conn.host else ""
-                self.base_url = f"{schema}://{host}"
-
-            if conn.port:
-                self.base_url += f":{conn.port}"
-            if conn.login:
-                session.auth = self.auth_type(conn.login, conn.password)
-            elif self._auth_type:
-                session.auth = self.auth_type()
-            if conn.extra:
-                extra = conn.extra_dejson
-                extra.pop(
-                    "timeout", None
-                )  # ignore this as timeout is only accepted in request method 
of Session
-                extra.pop("allow_redirects", None)  # ignore this as only 
max_redirects is accepted in Session
-                session.proxies = extra.pop("proxies", extra.pop("proxy", {}))
-                session.stream = extra.pop("stream", False)
-                session.verify = extra.pop("verify", extra.pop("verify_ssl", 
True))
-                session.cert = extra.pop("cert", None)
-                session.max_redirects = extra.pop("max_redirects", 
DEFAULT_REDIRECT_LIMIT)
-                session.trust_env = extra.pop("trust_env", True)
-
-                try:
-                    session.headers.update(extra)
-                except TypeError:
-                    self.log.warning("Connection to %s has invalid extra 
field.", conn.host)
         if headers:
             session.headers.update(headers)
 
         return session
 
+    def _set_base_url(self, connection) -> None:
+        host = connection.host or ""
+        schema = connection.schema or "http"
+        if "://" in host:
+            self.base_url = host
+        else:
+            self.base_url = f"{schema}://{host}" if host else f"{schema}://"
+            if connection.port:
+                self.base_url += f":{connection.port}"
+        parsed = urlparse(self.base_url)
+        if not parsed.scheme:
+            raise ValueError(f"Invalid base URL: Missing scheme in 
{self.base_url}")
+
+    def _set_auth(self, session: requests.Session, connection) -> None:
+        if connection.login:
+            session.auth = self.auth_type(connection.login, 
connection.password)
+        elif self._auth_type:
+            session.auth = self.auth_type()
+
+    def _set_extra(self, session: requests.Session, connection) -> None:

Review Comment:
   `_set_extra` is a confusing name in this case as we're not actually setting 
extra but extracting attributes from extra



##########
providers/src/airflow/providers/http/hooks/http.py:
##########
@@ -102,49 +120,68 @@ def get_conn(self, headers: dict[Any, Any] | None = None) 
-> requests.Session:
         """
         Create a Requests HTTP session.
 
-        :param headers: additional headers to be passed through as a dictionary
+        :param headers: Additional headers to be passed through as a 
dictionary.
+        :return: A configured requests.Session object.
         """
         session = requests.Session()
+        connection = self.get_connection(self.http_conn_id)
 
-        if self.http_conn_id:
-            conn = self.get_connection(self.http_conn_id)
+        self._set_base_url(connection)
+        self._set_auth(session, connection)
+        self._set_extra(session, connection)
+        self._mount_adapters(session)
 
-            if conn.host and "://" in conn.host:
-                self.base_url = conn.host
-            else:
-                # schema defaults to HTTP
-                schema = conn.schema if conn.schema else "http"
-                host = conn.host if conn.host else ""
-                self.base_url = f"{schema}://{host}"
-
-            if conn.port:
-                self.base_url += f":{conn.port}"
-            if conn.login:
-                session.auth = self.auth_type(conn.login, conn.password)
-            elif self._auth_type:
-                session.auth = self.auth_type()
-            if conn.extra:
-                extra = conn.extra_dejson
-                extra.pop(
-                    "timeout", None
-                )  # ignore this as timeout is only accepted in request method 
of Session
-                extra.pop("allow_redirects", None)  # ignore this as only 
max_redirects is accepted in Session
-                session.proxies = extra.pop("proxies", extra.pop("proxy", {}))
-                session.stream = extra.pop("stream", False)
-                session.verify = extra.pop("verify", extra.pop("verify_ssl", 
True))
-                session.cert = extra.pop("cert", None)
-                session.max_redirects = extra.pop("max_redirects", 
DEFAULT_REDIRECT_LIMIT)
-                session.trust_env = extra.pop("trust_env", True)
-
-                try:
-                    session.headers.update(extra)
-                except TypeError:
-                    self.log.warning("Connection to %s has invalid extra 
field.", conn.host)
         if headers:
             session.headers.update(headers)
 
         return session
 
+    def _set_base_url(self, connection) -> None:
+        host = connection.host or ""
+        schema = connection.schema or "http"
+        if "://" in host:
+            self.base_url = host
+        else:
+            self.base_url = f"{schema}://{host}" if host else f"{schema}://"
+            if connection.port:
+                self.base_url += f":{connection.port}"
+        parsed = urlparse(self.base_url)
+        if not parsed.scheme:
+            raise ValueError(f"Invalid base URL: Missing scheme in 
{self.base_url}")
+
+    def _set_auth(self, session: requests.Session, connection) -> None:
+        if connection.login:
+            session.auth = self.auth_type(connection.login, 
connection.password)
+        elif self._auth_type:
+            session.auth = self.auth_type()
+
+    def _set_extra(self, session: requests.Session, connection) -> None:
+        if connection.extra:
+            extra = connection.extra_dejson
+            extra.pop("timeout", None)
+            extra.pop("allow_redirects", None)
+            session.proxies = extra.pop("proxies", extra.pop("proxy", {}))
+            session.stream = extra.pop("stream", False)
+            session.verify = extra.pop("verify", extra.pop("verify_ssl", True))
+            session.cert = extra.pop("cert", None)
+            session.max_redirects = extra.pop("max_redirects", 
DEFAULT_REDIRECT_LIMIT)
+            session.trust_env = extra.pop("trust_env", True)
+
+            try:
+                session.headers.update(extra)
+            except TypeError:
+                self.log.warning("Connection to %s has invalid extra field.", 
connection.host)
+
+    def _mount_adapters(self, session: requests.Session) -> None:
+        scheme = urlparse(self.base_url).scheme
+        if not scheme:
+            raise ValueError("Cannot mount adapters: base_url must include a 
valid scheme (http or https).")

Review Comment:
   ```suggestion
               raise ValueError(f"Cannot mount adapters: "{self.base_url}" does 
not include a valid scheme (http or https).")
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to