sha174n commented on code in PR #39301:
URL: https://github.com/apache/superset/pull/39301#discussion_r3405762800


##########
superset/utils/network.py:
##########
@@ -14,14 +14,61 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
+import ipaddress
 import platform
 import socket
 import subprocess
 
+# Networks that must never be reached via user-supplied hostnames.
+# Includes loopback, RFC-1918 private ranges, link-local (covers cloud
+# metadata endpoints such as 169.254.169.254), shared address space
+# (RFC 6598, 100.64.0.0/10), and IPv6 equivalents.
+_SSRF_UNSAFE_NETWORKS = (
+    ipaddress.ip_network("0.0.0.0/8"),
+    ipaddress.ip_network("10.0.0.0/8"),
+    ipaddress.ip_network("100.64.0.0/10"),
+    ipaddress.ip_network("127.0.0.0/8"),
+    ipaddress.ip_network("169.254.0.0/16"),
+    ipaddress.ip_network("172.16.0.0/12"),
+    ipaddress.ip_network("192.168.0.0/16"),
+    ipaddress.ip_network("::1/128"),
+    ipaddress.ip_network("fc00::/7"),
+    ipaddress.ip_network("fe80::/10"),
+)
+
 PORT_TIMEOUT = 5
 PING_TIMEOUT = 5
 
 
+def is_safe_host(host: str) -> bool:
+    """
+    Return True if ``host`` resolves exclusively to public, globally-routable
+    IP addresses.
+
+    Returns False if any resolved address falls within a private, loopback,
+    link-local, or otherwise non-routable range.  An unresolvable host also
+    returns False.
+    """
+    try:
+        results = socket.getaddrinfo(host, None)
+    except socket.gaierror:
+        return False
+    if not results:
+        return False
+    for _, _, _, _, sockaddr in results:
+        try:
+            ip = ipaddress.ip_address(sockaddr[0])
+        except ValueError:
+            return False
+        # Unwrap IPv4-mapped IPv6 addresses (e.g. ::ffff:127.0.0.1) so they
+        # are checked against the IPv4 unsafe networks rather than bypassing.
+        if isinstance(ip, ipaddress.IPv6Address) and ip.ipv4_mapped:
+            ip = ip.ipv4_mapped
+        if any(ip in net for net in _SSRF_UNSAFE_NETWORKS):

Review Comment:
   Addressed: `is_global` is the primary gate, backed by an explicit 
unsafe-network tuple (loopback, RFC-1918, link-local, 100.64.0.0/10, multicast) 
plus IPv4-mapped IPv6 unwrapping.



##########
superset/reports/notifications/webhook.py:
##########
@@ -93,6 +94,16 @@ def _get_files(self) -> list[tuple[str, tuple[str, bytes, 
str]]]:
                 )
         return files
 
+    def _validate_webhook_url(self, url: str) -> None:
+        parsed = urlparse(url)
+        if current_app.config["ALERT_REPORTS_WEBHOOK_HTTPS_ONLY"]:
+            if parsed.scheme.lower() != "https":
+                raise NotificationParamException(
+                    "Webhook failed: HTTPS is required by config for webhook 
URLs."
+                )
+        if parsed.hostname and not is_safe_host(parsed.hostname):

Review Comment:
   Addressed: `_validate_webhook_url` rejects non-HTTP(S) schemes and missing 
hostnames before the host check.



##########
superset/db_engine_specs/impala.py:
##########
@@ -209,6 +210,8 @@ def cancel_query(cls, cursor: Any, query: Query, 
cancel_query_id: str) -> bool:
         """
         try:
             impala_host = query.database.url_object.host
+            if not impala_host or not is_safe_host(impala_host):
+                return False
             url = 
f"http://{impala_host}:25000/cancel_query?query_id={cancel_query_id}";
             response = requests.post(url, timeout=3)

Review Comment:
   Addressed: gated behind `IMPALA_CANCEL_QUERY_ALLOW_INTERNAL_HOSTS` so 
internal Impala deployments can opt out (test hardened in 2e7e54d5ff).



##########
superset/db_engine_specs/couchbase.py:
##########
@@ -228,7 +228,7 @@ def validate_parameters(
         if not host:
             return errors
         # host can be a connection string in case of couchbase cloud. So 
Connection Check is not required in that case.  # noqa: E501
-        if not is_hostname_valid(host):
+        if not is_safe_host(host):

Review Comment:
   Reverted: couchbase keeps `is_hostname_valid`, so connection strings and 
internal hosts are unaffected.



##########
superset/db_engine_specs/databricks.py:
##########
@@ -393,7 +393,7 @@ def validate_parameters(  # type: ignore
         if not host:
             return errors
 
-        if not is_hostname_valid(host):  # type: ignore
+        if not is_safe_host(host):  # type: ignore
             errors.append(
                 SupersetError(
                     message="The hostname provided can't be resolved.",

Review Comment:
   Reverted: databricks keeps `is_hostname_valid`; the host-check change was 
dropped from this PR.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to