github-actions[bot] commented on code in PR #63817: URL: https://github.com/apache/doris/pull/63817#discussion_r3472314762
########## fe/fe-core/src/main/java/org/apache/doris/datasource/connectivity/CatalogSsrfChecker.java: ########## @@ -0,0 +1,401 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.datasource.connectivity; + +import org.apache.doris.cloud.security.SecurityChecker; +import org.apache.doris.common.DdlException; +import org.apache.doris.datasource.property.metastore.MetastoreProperties; +import org.apache.doris.datasource.property.storage.HdfsProperties; +import org.apache.doris.datasource.property.storage.StorageProperties; +import org.apache.doris.foundation.property.ConnectorProperty; + +import com.google.common.collect.Sets; +import org.apache.commons.lang3.StringUtils; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +import java.lang.reflect.Field; +import java.lang.reflect.Modifier; +import java.net.Inet6Address; +import java.net.InetAddress; +import java.net.URI; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.Set; + +/** + * Validates user-supplied catalog URIs against SSRF attacks before a catalog is created. + * + * <p>Unlike {@link CatalogConnectivityTestCoordinator}, this checker opens no endpoint + * connections; it resolves each target host and rejects internal / private / loopback + * addresses before catalog properties are persisted. Because no connectivity probe is + * required, the check runs unconditionally on every CREATE CATALOG, regardless of the + * {@code test_connection} property. + * + * <p>Discovery is driven by the {@link ConnectorProperty#checkSsrf()} flag. To extend + * coverage to a new property class, simply set {@code checkSsrf = true} on its endpoint / + * URI field; no change to this class is required. + */ +public class CatalogSsrfChecker { + private static final Logger LOG = LogManager.getLogger(CatalogSsrfChecker.class); + + /** Reflection traversal only descends into property classes under this package prefix. */ + private static final String PROPERTY_PACKAGE_PREFIX = "org.apache.doris.datasource.property."; + + /** + * HDFS HA exposes one rpc-address per namenode under dynamic keys + * (e.g. {@code dfs.namenode.rpc-address.<nameservice>.<nn>}); these are stored in + * {@link HdfsProperties#getBackendConfigProperties()} rather than as declared fields, + * so {@link ConnectorProperty#checkSsrf()} cannot reach them. They are collected + * separately. + */ + private static final String HDFS_NAMENODE_RPC_ADDRESS_PREFIX = "dfs.namenode.rpc-address."; + + private CatalogSsrfChecker() {} + + interface UriValidator { + void checkUri(String uri) throws Exception; + + void checkJdbcUrl(String jdbcUrl) throws Exception; + } + + private static class DefaultUriValidator implements UriValidator { + @Override + public void checkUri(String uri) throws Exception { + checkHost(uri, extractHostFromUri(uri)); + try { + SecurityChecker.getInstance().startSSRFChecking(uri); + } finally { + SecurityChecker.getInstance().stopSSRFChecking(); + } + } + + @Override + public void checkJdbcUrl(String jdbcUrl) throws Exception { + String host = extractHostFromJdbcUrl(jdbcUrl); Review Comment: This fallback makes the explicit JDBC host check miss supported JDBC URL forms. `extractHostFromJdbcUrl()` returns `null` for inputs such as `jdbc:oracle:thin:@127.0.0.1:1521:orcl`, `jdbc:oracle:thin:@//127.0.0.1:1521/orcl`, and `jdbc:sqlserver://127.0.0.1:1433;databaseName=db` because the generic `normalizeToHttpUrl()`/`URI.getHost()` parsing cannot understand those syntaxes. When that happens this method skips `checkHost()` and only calls `SecurityChecker.getSafeJdbcUrl()`; with the default `DummySecurityChecker` that is a no-op, so loopback/private JDBC endpoints can still be persisted. Please fail closed when the JDBC host cannot be parsed, or use a JDBC-aware parser that validates every supported JDBC URL form before falling back to the security checker. ########## fe/fe-core/src/test/java/org/apache/doris/datasource/ExternalCatalogTest.java: ########## @@ -145,6 +149,38 @@ public void testExternalCatalogAutoAnalyze() throws Exception { Assertions.assertTrue(catalog.enableAutoAnalyze()); } + @Test + public void testAlterCatalogPropsRunsSsrfCheckAndRollsBack() throws Exception { + String catalogName = "alter_ssrf_hms"; + SecurityChecker mockChecker = Mockito.mock(SecurityChecker.class); + try (MockedStatic<SecurityChecker> mockedStatic = Mockito.mockStatic(SecurityChecker.class)) { + mockedStatic.when(SecurityChecker::getInstance).thenReturn(mockChecker); + + NereidsParser nereidsParser = new NereidsParser(); + String createStmt = "create catalog " + catalogName + " properties(\n" + + " \"type\" = \"hms\",\n" + + " \"hive.metastore.uris\" = \"thrift://safe-host:9083\"\n" + + ");"; + LogicalPlan logicalPlan = nereidsParser.parseSingle(createStmt); + Assertions.assertTrue(logicalPlan instanceof CreateCatalogCommand); + ((CreateCatalogCommand) logicalPlan).run(rootCtx, null); + + Mockito.doThrow(new RuntimeException("URL points to private IP")) + .when(mockChecker).startSSRFChecking("http://127.0.0.1:9083"); + Map<String, String> newProps = Maps.newHashMap(); + newProps.put("hive.metastore.uris", "thrift://127.0.0.1:9083"); + + DdlException ex = Assertions.assertThrows(DdlException.class, + () -> mgr.alterCatalogProps(catalogName, newProps)); + + Assertions.assertTrue(ex.getMessage().contains("SSRF check failed"), + "message should explain SSRF failure, was: " + ex.getMessage()); + Assertions.assertEquals("thrift://safe-host:9083", Review Comment: This verification is stale after the checker was changed to reject loopback hosts before entering the `SecurityChecker` hook. For `thrift://127.0.0.1:9083`, `DefaultUriValidator.checkUri()` calls `checkHost()` first, and `checkHost()` throws on `isLoopbackAddress()` before `startSSRFChecking()` is reached. The alter call still rejects and rolls back, but `Mockito.verify(mockChecker).startSSRFChecking("http://127.0.0.1:9083")` should fail. Please either drop this hook verification for the loopback case, or use a non-loopback hostname if this test is meant to exercise the hook path. ########## fe/fe-core/src/test/java/org/apache/doris/datasource/connectivity/CatalogSsrfCheckerTest.java: ########## @@ -0,0 +1,384 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.datasource.connectivity; + +import org.apache.doris.common.DdlException; +import org.apache.doris.common.UserException; +import org.apache.doris.datasource.property.metastore.MetastoreProperties; +import org.apache.doris.datasource.property.storage.HdfsProperties; +import org.apache.doris.datasource.property.storage.StorageProperties; +import org.apache.doris.foundation.property.ConnectorProperty; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import java.lang.reflect.Field; +import java.lang.reflect.Modifier; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +/** + * Verifies that {@link CatalogSsrfChecker} discovers the right URIs from various property + * shapes, driven by the {@code @ConnectorProperty(checkSsrf=true)} annotation plus the + * HDFS dynamic-key special case, and hands each one to the validator in the expected form. + */ +public class CatalogSsrfCheckerTest { + + @Test + public void testNullInputsDoNothing() throws Exception { + RecordingValidator validator = new RecordingValidator(); + + CatalogSsrfChecker.check("cat", null, null, validator); + + Assertions.assertTrue(validator.checkedUris.isEmpty()); + Assertions.assertTrue(validator.checkedJdbcUrls.isEmpty()); + } + + @Test + public void testEmptyStorageMapDoesNothing() throws Exception { + RecordingValidator validator = new RecordingValidator(); + + CatalogSsrfChecker.check("cat", null, new HashMap<>(), validator); + + Assertions.assertTrue(validator.checkedUris.isEmpty()); + Assertions.assertTrue(validator.checkedJdbcUrls.isEmpty()); + } + + @Test + public void testHmsThriftUriIsValidated() throws Exception { + MetastoreProperties msProps = createHmsProperties("thrift://internal-host:9083"); + RecordingValidator validator = new RecordingValidator(); + + CatalogSsrfChecker.check("cat", msProps, null, validator); + + Assertions.assertEquals(Arrays.asList("http://internal-host:9083"), validator.checkedUris); + } + + @Test + public void testLoopbackHostIsRejectedWithoutNetworkHook() throws Exception { + MetastoreProperties msProps = createHmsProperties("thrift://127.0.0.1:9083"); + + DdlException ex = Assertions.assertThrows(DdlException.class, + () -> CatalogSsrfChecker.check("cat", msProps, null)); + + Assertions.assertTrue(ex.getMessage().contains("127.0.0.1"), + "message should name the rejected host, was: " + ex.getMessage()); + } + + @Test + public void testCommaSeparatedHmsUrisValidatedIndependently() throws Exception { + MetastoreProperties msProps = createHmsProperties("thrift://h1:9083,thrift://h2:9083"); + RecordingValidator validator = new RecordingValidator(); + + CatalogSsrfChecker.check("cat", msProps, null, validator); + + Assertions.assertEquals(Arrays.asList("http://h1:9083", "http://h2:9083"), validator.checkedUris); + } + + @Test + public void testIcebergRestUriStripsSchemeAndPath() throws Exception { + Map<String, String> props = new HashMap<>(); + props.put("type", "iceberg"); + props.put("iceberg.catalog.type", "rest"); + props.put("iceberg.rest.uri", "https://internal-host:8181/v1/catalog"); + props.put("warehouse", "s3://w/path"); + MetastoreProperties msProps = MetastoreProperties.create(props); + RecordingValidator validator = new RecordingValidator(); + + CatalogSsrfChecker.check("cat", msProps, null, validator); + + Assertions.assertEquals(Arrays.asList("http://internal-host:8181"), validator.checkedUris); + } + + @Test + public void testIcebergRestOauth2ServerUriIsValidated() throws Exception { + Map<String, String> props = new HashMap<>(); + props.put("type", "iceberg"); + props.put("iceberg.catalog.type", "rest"); + props.put("iceberg.rest.uri", "https://iceberg-rest.example.com/v1/catalog"); + props.put("iceberg.rest.oauth2.credential", "client:secret"); + props.put("iceberg.rest.oauth2.server-uri", "https://oauth.example.com/token"); + props.put("warehouse", "s3://w/path"); + MetastoreProperties msProps = MetastoreProperties.create(props); + RecordingValidator validator = new RecordingValidator(); + + CatalogSsrfChecker.check("cat", msProps, null, validator); + + Assertions.assertEquals(Arrays.asList( + "http://iceberg-rest.example.com", "http://oauth.example.com"), validator.checkedUris); + } + + @Test + public void testIcebergJdbcUriUsesJdbcChecker() throws Exception { + Map<String, String> props = new HashMap<>(); + props.put("type", "iceberg"); + props.put("iceberg.catalog.type", "jdbc"); + props.put("iceberg.jdbc.uri", "jdbc:mysql://jdbc-host:3306/iceberg"); + props.put("iceberg.jdbc.catalog_name", "iceberg"); + props.put("warehouse", "s3://w/path"); + MetastoreProperties msProps = MetastoreProperties.create(props); + RecordingValidator validator = new RecordingValidator(); + + CatalogSsrfChecker.check("cat", msProps, null, validator); + + Assertions.assertEquals(Arrays.asList("jdbc:mysql://jdbc-host:3306/iceberg"), validator.checkedJdbcUrls); + } + + @Test + public void testHdfsDefaultFsIsValidated() throws Exception { + Map<String, String> props = new HashMap<>(); + props.put("fs.defaultFS", "hdfs://nn-host:9000"); + HdfsProperties hdfs = new HdfsProperties(props); + hdfs.initNormalizeAndCheckProps(); + + Map<StorageProperties.Type, StorageProperties> storageMap = new HashMap<>(); + storageMap.put(StorageProperties.Type.HDFS, hdfs); + RecordingValidator validator = new RecordingValidator(); + + CatalogSsrfChecker.check("cat", null, storageMap, validator); + + Assertions.assertEquals(Arrays.asList("http://nn-host:9000"), validator.checkedUris); + } + + @Test + public void testHdfsHaNamenodeRpcAddressesAreValidated() throws Exception { + Map<String, String> props = new HashMap<>(); + props.put("fs.defaultFS", "hdfs://ns1"); + props.put("dfs.nameservices", "ns1"); + props.put("dfs.ha.namenodes.ns1", "nn1,nn2"); + props.put("dfs.namenode.rpc-address.ns1.nn1", "nn1-host:8020"); + props.put("dfs.namenode.rpc-address.ns1.nn2", "nn2-host:8020"); + props.put("dfs.client.failover.proxy.provider.ns1", + "org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider"); + HdfsProperties hdfs = new HdfsProperties(props); + hdfs.initNormalizeAndCheckProps(); + + Map<StorageProperties.Type, StorageProperties> storageMap = new HashMap<>(); + storageMap.put(StorageProperties.Type.HDFS, hdfs); + RecordingValidator validator = new RecordingValidator(); + + CatalogSsrfChecker.check("cat", null, storageMap, validator); + + Assertions.assertEquals(new HashSet<>(Arrays.asList("http://nn1-host:8020", "http://nn2-host:8020")), + new HashSet<>(validator.checkedUris)); + } + + @Test + public void testAzureOauthServerUriIsValidated() throws Exception { + Map<String, String> props = new HashMap<>(); + props.put("type", "iceberg"); + props.put("iceberg.catalog.type", "rest"); + props.put("provider", "azure"); + props.put("azure.auth_type", "OAuth2"); + props.put("azure.endpoint", "https://onelake.dfs.fabric.microsoft.com"); + props.put("azure.oauth2_client_id", "client-id"); + props.put("azure.oauth2_client_secret", "client-secret"); + props.put("azure.oauth2_server_uri", "https://login.microsoftonline.com/tenant/oauth2/token"); + props.put("azure.oauth2_account_host", "onelake.dfs.fabric.microsoft.com"); + StorageProperties azure = StorageProperties.createPrimary(props); + + Map<StorageProperties.Type, StorageProperties> storageMap = new HashMap<>(); + storageMap.put(StorageProperties.Type.AZURE, azure); + RecordingValidator validator = new RecordingValidator(); + + CatalogSsrfChecker.check("cat", null, storageMap, validator); + + Assertions.assertEquals(Arrays.asList( + "http://onelake.dfs.fabric.microsoft.com", "http://login.microsoftonline.com"), + validator.checkedUris); + } + + @Test + public void testImplicitHdfsStorageIsSkipped() throws Exception { + // explicitlyConfigured=false means auto-created fallback; should be ignored to avoid + // breaking catalogs whose user didn't actually configure HDFS. + Map<String, String> props = new HashMap<>(); + props.put("fs.defaultFS", "hdfs://nn-host:9000"); + HdfsProperties hdfs = new HdfsProperties(props, false); + hdfs.initNormalizeAndCheckProps(); + + Map<StorageProperties.Type, StorageProperties> storageMap = new HashMap<>(); + storageMap.put(StorageProperties.Type.HDFS, hdfs); + RecordingValidator validator = new RecordingValidator(); + + CatalogSsrfChecker.check("cat", null, storageMap, validator); + + Assertions.assertTrue(validator.checkedUris.isEmpty()); + } + + @Test + public void testSecurityCheckerExceptionPropagatesAsDdlException() throws Exception { + MetastoreProperties msProps = createHmsProperties("thrift://forbidden-host:9083"); + RecordingValidator validator = new RecordingValidator(); + validator.uriFailureMessage = "URL points to private IP"; + + DdlException ex = Assertions.assertThrows(DdlException.class, + () -> CatalogSsrfChecker.check("cat", msProps, null, validator)); + + Assertions.assertTrue(ex.getMessage().contains("SSRF check failed"), + "message should explain SSRF failure, was: " + ex.getMessage()); + Assertions.assertTrue(ex.getMessage().contains("forbidden-host"), + "message should name the offending host, was: " + ex.getMessage()); + } + + @Test + public void testEndpointLikeConnectorPropertiesOptIntoSsrfCheck() throws Exception { + Set<String> allowedUncheckedFields = new HashSet<>(Arrays.asList( + "org.apache.doris.datasource.property.storage.AzureProperties#accountHost", + "org.apache.doris.datasource.property.storage.AzureProperties#forceParsingByStandardUrl", + "org.apache.doris.datasource.property.metastore.IcebergJdbcMetaStoreProperties#driverUrl" + )); + for (Class<?> clazz : new Class<?>[] { + org.apache.doris.datasource.property.metastore.AWSGlueMetaStoreBaseProperties.class, + org.apache.doris.datasource.property.metastore.AliyunDLFBaseProperties.class, + org.apache.doris.datasource.property.metastore.HMSBaseProperties.class, + org.apache.doris.datasource.property.metastore.IcebergJdbcMetaStoreProperties.class, + org.apache.doris.datasource.property.metastore.IcebergRestProperties.class, + org.apache.doris.datasource.property.metastore.PaimonRestMetaStoreProperties.class, Review Comment: The audit is still not covering all of the catalog endpoint shapes that this checker relies on. One concrete miss is `PaimonJdbcMetaStoreProperties`: `paimon.catalog.type=jdbc` is registered in `PaimonPropertiesFactory`, its `@ConnectorProperty(names = {"uri", "paimon.jdbc.uri"})` does not set `checkSsrf = true`, and `appendCustomCatalogOptions()` passes that value to `CatalogOptions.URI`, so a Paimon JDBC catalog can still persist an internal JDBC endpoint. There is also a non-annotated shape in `IcebergS3TablesMetaStoreProperties`: it carries raw catalog props through to `buildS3TablesClient()` and applies `S3TablesProperties.S3TABLES_ENDPOINT` via `endpointOverride(...)`, but neither the annotation walk nor this audit list can see it. Please add coverage for these metastore classes/endpoints instead of only auditing the current hand-picked list, and add negative tests proving those endpoints are rejected. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
