Copilot commented on code in PR #63676:
URL: https://github.com/apache/doris/pull/63676#discussion_r3302649679
##########
docker/thirdparties/docker-compose/hive/scripts/hive-metastore.sh:
##########
@@ -18,6 +18,9 @@
set -e -x
+. /mnt/scripts/bootstrap/bootstrap-groups.sh
+BOOTSTRAP_GROUPS="$(bootstrap_normalize_groups "${HIVE_BOOTSTRAP_GROUPS:-}")"
Review Comment:
Same failure mode as in `prepare-hive-data.sh`: if
`bootstrap_normalize_groups` errors, the script may continue despite `set -e`
because the failure occurs inside command substitution. Add an explicit `||
exit 1` (or equivalent) to guarantee invalid bootstrap group configurations
halt the container init.
##########
docker/thirdparties/docker-compose/hive/scripts/prepare-hive-data.sh:
##########
@@ -18,110 +18,73 @@ set -eo pipefail
# under the License.
CUR_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"
-# Extract all tar.gz files under the repo
-find ${CUR_DIR}/data -type f -name "*.tar.gz" -print0 | \
-xargs -0 -n1 -P"${LOAD_PARALLEL}" bash -c '
- f="$0"
- echo "Extracting hive data $f"
- dir=$(dirname "$f")
- tar -xzf "$f" -C "$dir"
-'
+. "${CUR_DIR}/bootstrap/bootstrap-groups.sh"
-# download tpch1_data
-if [[ ! -d "${CUR_DIR}/tpch1.db" ]]; then
- echo "${CUR_DIR}/tpch1.db does not exist"
- cd ${CUR_DIR}/
- curl -O
https://${s3BucketName}.${s3Endpoint}/regression/datalake/pipeline_data/tpch1.db.tar.gz
- tar -zxf tpch1.db.tar.gz
- rm -rf tpch1.db.tar.gz
- cd -
-else
- echo "${CUR_DIR}/tpch1.db exist, continue !"
+BOOTSTRAP_GROUPS="$(bootstrap_normalize_groups "${HIVE_BOOTSTRAP_GROUPS:-}")"
Review Comment:
`bootstrap_normalize_groups` can return a non-zero status for unknown
groups, but assigning via command substitution won’t reliably stop the script
under `set -e` (bash’s `errexit` is commonly suppressed in command
substitutions). This can lead to silently continuing with an empty/invalid
`BOOTSTRAP_GROUPS`. Capture/validate the exit code explicitly (e.g., `... ||
exit 1`) so invalid group inputs fail fast.
##########
fe/fe-core/src/main/java/org/apache/doris/catalog/JdbcDriverLoader.java:
##########
@@ -0,0 +1,140 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.catalog;
+
+import org.apache.doris.common.DdlException;
+
+import org.apache.commons.lang3.StringUtils;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.net.URLClassLoader;
+import java.sql.Connection;
+import java.sql.Driver;
+import java.sql.DriverManager;
+import java.sql.DriverPropertyInfo;
+import java.sql.SQLException;
+import java.sql.SQLFeatureNotSupportedException;
+import java.util.Map;
+import java.util.Properties;
+import java.util.Set;
+import java.util.concurrent.ConcurrentHashMap;
+
+public class JdbcDriverLoader {
+ private static final Logger LOG =
LogManager.getLogger(JdbcDriverLoader.class);
+ private static final Map<URL, ClassLoader> DRIVER_CLASS_LOADER_CACHE = new
ConcurrentHashMap<>();
+ private static final Set<String> REGISTERED_DRIVER_KEYS =
ConcurrentHashMap.newKeySet();
+
+ private JdbcDriverLoader() {
+ }
+
+ public static String validateDriverChecksum(String driverUrl, String
expectedChecksum) throws DdlException {
+ String actualChecksum = JdbcResource.computeObjectChecksum(driverUrl);
+ if (StringUtils.isNotBlank(expectedChecksum) &&
!expectedChecksum.equals(actualChecksum)) {
+ throw new DdlException("The provided checksum (" + expectedChecksum
+ + ") does not match the computed checksum (" +
actualChecksum
+ + ") for the driver_url.");
+ }
+ return actualChecksum;
+ }
+
+ public static String registerDriver(String driverUrl, String
driverClassName, String expectedChecksum,
+ ClassLoader parentClassLoader) {
+ if (StringUtils.isBlank(driverClassName)) {
+ throw new IllegalArgumentException("JDBC driver class is required
when driver url is specified.");
+ }
+
+ try {
+ validateDriverChecksum(driverUrl, expectedChecksum);
+ } catch (DdlException e) {
+ throw new IllegalArgumentException(e.getMessage(), e);
+ }
+
+ try {
+ String fullDriverUrl = JdbcResource.getFullDriverUrl(driverUrl);
Review Comment:
Checksum validation is performed on `driverUrl`, but the actual driver is
loaded from `fullDriverUrl` after normalization via
`JdbcResource.getFullDriverUrl`. If normalization changes the resolved resource
(e.g., alternative schemes/canonicalization), you could validate one URL but
load another. Validate the checksum against the same resolved/normalized URL
(`fullDriverUrl`) that will be used for the `URLClassLoader`.
##########
fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jdbc/JdbcDriverUtils.java:
##########
@@ -0,0 +1,169 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.common.jdbc;
+
+import org.apache.commons.codec.binary.Hex;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.log4j.Logger;
Review Comment:
This introduces a dependency on Log4j 1.x (`org.apache.log4j.Logger`), which
is EOL and has known security/maintenance issues. If the module supports it,
switch to the project’s standard logging facade (often SLF4J + Log4j2) to avoid
carrying Log4j 1.x in new code.
##########
fe/fe-core/src/main/java/org/apache/doris/catalog/JdbcDriverLoader.java:
##########
@@ -0,0 +1,140 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.catalog;
+
+import org.apache.doris.common.DdlException;
+
+import org.apache.commons.lang3.StringUtils;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.net.URLClassLoader;
+import java.sql.Connection;
+import java.sql.Driver;
+import java.sql.DriverManager;
+import java.sql.DriverPropertyInfo;
+import java.sql.SQLException;
+import java.sql.SQLFeatureNotSupportedException;
+import java.util.Map;
+import java.util.Properties;
+import java.util.Set;
+import java.util.concurrent.ConcurrentHashMap;
+
+public class JdbcDriverLoader {
+ private static final Logger LOG =
LogManager.getLogger(JdbcDriverLoader.class);
+ private static final Map<URL, ClassLoader> DRIVER_CLASS_LOADER_CACHE = new
ConcurrentHashMap<>();
+ private static final Set<String> REGISTERED_DRIVER_KEYS =
ConcurrentHashMap.newKeySet();
+
+ private JdbcDriverLoader() {
+ }
+
+ public static String validateDriverChecksum(String driverUrl, String
expectedChecksum) throws DdlException {
+ String actualChecksum = JdbcResource.computeObjectChecksum(driverUrl);
+ if (StringUtils.isNotBlank(expectedChecksum) &&
!expectedChecksum.equals(actualChecksum)) {
+ throw new DdlException("The provided checksum (" + expectedChecksum
+ + ") does not match the computed checksum (" +
actualChecksum
+ + ") for the driver_url.");
+ }
+ return actualChecksum;
+ }
+
+ public static String registerDriver(String driverUrl, String
driverClassName, String expectedChecksum,
+ ClassLoader parentClassLoader) {
+ if (StringUtils.isBlank(driverClassName)) {
+ throw new IllegalArgumentException("JDBC driver class is required
when driver url is specified.");
+ }
+
+ try {
+ validateDriverChecksum(driverUrl, expectedChecksum);
+ } catch (DdlException e) {
+ throw new IllegalArgumentException(e.getMessage(), e);
+ }
+
+ try {
+ String fullDriverUrl = JdbcResource.getFullDriverUrl(driverUrl);
Review Comment:
Checksum validation is performed on `driverUrl`, but the actual driver is
loaded from `fullDriverUrl` after normalization via
`JdbcResource.getFullDriverUrl`. If normalization changes the resolved resource
(e.g., alternative schemes/canonicalization), you could validate one URL but
load another. Validate the checksum against the same resolved/normalized URL
(`fullDriverUrl`) that will be used for the `URLClassLoader`.
##########
fe/fe-core/src/main/java/org/apache/doris/catalog/JdbcDriverLoader.java:
##########
@@ -0,0 +1,140 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.catalog;
+
+import org.apache.doris.common.DdlException;
+
+import org.apache.commons.lang3.StringUtils;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.net.URLClassLoader;
+import java.sql.Connection;
+import java.sql.Driver;
+import java.sql.DriverManager;
+import java.sql.DriverPropertyInfo;
+import java.sql.SQLException;
+import java.sql.SQLFeatureNotSupportedException;
+import java.util.Map;
+import java.util.Properties;
+import java.util.Set;
+import java.util.concurrent.ConcurrentHashMap;
+
+public class JdbcDriverLoader {
+ private static final Logger LOG =
LogManager.getLogger(JdbcDriverLoader.class);
+ private static final Map<URL, ClassLoader> DRIVER_CLASS_LOADER_CACHE = new
ConcurrentHashMap<>();
+ private static final Set<String> REGISTERED_DRIVER_KEYS =
ConcurrentHashMap.newKeySet();
+
+ private JdbcDriverLoader() {
+ }
+
+ public static String validateDriverChecksum(String driverUrl, String
expectedChecksum) throws DdlException {
+ String actualChecksum = JdbcResource.computeObjectChecksum(driverUrl);
+ if (StringUtils.isNotBlank(expectedChecksum) &&
!expectedChecksum.equals(actualChecksum)) {
+ throw new DdlException("The provided checksum (" + expectedChecksum
+ + ") does not match the computed checksum (" +
actualChecksum
+ + ") for the driver_url.");
+ }
+ return actualChecksum;
+ }
+
+ public static String registerDriver(String driverUrl, String
driverClassName, String expectedChecksum,
+ ClassLoader parentClassLoader) {
+ if (StringUtils.isBlank(driverClassName)) {
+ throw new IllegalArgumentException("JDBC driver class is required
when driver url is specified.");
+ }
+
+ try {
+ validateDriverChecksum(driverUrl, expectedChecksum);
+ } catch (DdlException e) {
+ throw new IllegalArgumentException(e.getMessage(), e);
+ }
+
+ try {
+ String fullDriverUrl = JdbcResource.getFullDriverUrl(driverUrl);
+ URL url = new URL(fullDriverUrl);
+ String driverKey = fullDriverUrl + "#" + driverClassName;
+ if (!REGISTERED_DRIVER_KEYS.add(driverKey)) {
+ LOG.info("JDBC driver already registered: {} from {}",
driverClassName, fullDriverUrl);
+ return fullDriverUrl;
+ }
+ try {
+ ClassLoader classLoader =
DRIVER_CLASS_LOADER_CACHE.computeIfAbsent(url, u ->
+ URLClassLoader.newInstance(new URL[] {u},
parentClassLoader));
Review Comment:
Caching `URLClassLoader` instances in a static map without any
eviction/close path can leak resources (open JAR handles on some platforms,
class metadata over time, and permanent retention of downloaded drivers).
Consider adding an explicit unregister/close mechanism (and removing from the
cache), or use a bounded/weak cache so long-running processes don’t accumulate
classloaders indefinitely.
##########
fe/fe-core/src/test/java/org/apache/doris/catalog/JdbcDriverLoaderTest.java:
##########
@@ -0,0 +1,85 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.catalog;
+
+import org.apache.doris.common.DdlException;
+import org.apache.doris.common.FeConstants;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+
+public class JdbcDriverLoaderTest {
+ private static final String DRIVER_BYTES = "doris-jdbc-driver";
+ private static final String DRIVER_CHECKSUM =
"ef2b3218a863c98bc78fdf447331b206";
+
+ @Test
+ public void testValidateDriverChecksumRejectsMismatch() throws Exception {
+ boolean oldRunningUnitTest = FeConstants.runningUnitTest;
+ FeConstants.runningUnitTest = false;
+ try {
+ String driverUrl = createDriverUrl();
+
+ DdlException exception = Assert.assertThrows(DdlException.class,
+ () -> JdbcDriverLoader.validateDriverChecksum(driverUrl,
"bad-checksum"));
+ Assert.assertTrue(exception.getMessage().contains("does not match
the computed checksum"));
+ } finally {
+ FeConstants.runningUnitTest = oldRunningUnitTest;
+ }
+ }
+
+ @Test
+ public void testRegisterDriverValidatesChecksumBeforeLoadingClass() throws
Exception {
+ boolean oldRunningUnitTest = FeConstants.runningUnitTest;
+ FeConstants.runningUnitTest = false;
+ try {
+ String driverUrl = createDriverUrl();
+
+ IllegalArgumentException exception =
Assert.assertThrows(IllegalArgumentException.class,
+ () -> JdbcDriverLoader.registerDriver(driverUrl,
+ "org.apache.doris.catalog.NotExistingDriver",
"bad-checksum",
+ getClass().getClassLoader()));
+ Assert.assertTrue(exception.getMessage().contains("does not match
the computed checksum"));
+ } finally {
+ FeConstants.runningUnitTest = oldRunningUnitTest;
+ }
+ }
+
+ @Test
+ public void testValidateDriverChecksumReturnsComputedChecksum() throws
Exception {
+ boolean oldRunningUnitTest = FeConstants.runningUnitTest;
+ FeConstants.runningUnitTest = false;
+ try {
+ String driverUrl = createDriverUrl();
+
+ Assert.assertEquals(DRIVER_CHECKSUM,
JdbcDriverLoader.validateDriverChecksum(driverUrl, DRIVER_CHECKSUM));
+ Assert.assertEquals(DRIVER_CHECKSUM,
JdbcDriverLoader.validateDriverChecksum(driverUrl, ""));
+ } finally {
+ FeConstants.runningUnitTest = oldRunningUnitTest;
Review Comment:
These tests mutate a global static flag (`FeConstants.runningUnitTest`). If
the test runner executes tests in parallel, this can cause cross-test
interference/flakiness. To make this robust, guard the mutation with a shared
lock (e.g., synchronize on `FeConstants.class` around the whole test body), or
refactor the production code to avoid relying on mutable global state for
checksum behavior in tests.
##########
fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jdbc/JdbcDriverUtils.java:
##########
@@ -0,0 +1,169 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.common.jdbc;
+
+import org.apache.commons.codec.binary.Hex;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.log4j.Logger;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.net.URLClassLoader;
+import java.net.URLConnection;
+import java.security.MessageDigest;
+import java.security.NoSuchAlgorithmException;
+import java.sql.Connection;
+import java.sql.Driver;
+import java.sql.DriverManager;
+import java.sql.DriverPropertyInfo;
+import java.sql.SQLException;
+import java.sql.SQLFeatureNotSupportedException;
+import java.util.Map;
+import java.util.Properties;
+import java.util.Set;
+import java.util.concurrent.ConcurrentHashMap;
+
+public class JdbcDriverUtils {
+ private static final Logger LOG = Logger.getLogger(JdbcDriverUtils.class);
+ private static final int HTTP_TIMEOUT_MS = 10000;
+ private static final Map<URL, ClassLoader> DRIVER_CLASS_LOADER_CACHE = new
ConcurrentHashMap<>();
+ private static final Set<String> REGISTERED_DRIVER_KEYS =
ConcurrentHashMap.newKeySet();
+
+ private JdbcDriverUtils() {
+ }
+
+ public static String registerDriver(String driverUrl, String
driverClassName, ClassLoader parentClassLoader) {
+ return registerDriver(driverUrl, driverClassName, "",
parentClassLoader);
+ }
+
+ public static String registerDriver(String driverUrl, String
driverClassName, String expectedChecksum,
+ ClassLoader parentClassLoader) {
+ if (StringUtils.isBlank(driverClassName)) {
+ throw new IllegalArgumentException("JDBC driver class is required
when driver url is specified.");
+ }
+ validateDriverChecksum(driverUrl, expectedChecksum);
+
+ try {
+ URL url = new URL(driverUrl);
+ String driverKey = driverUrl + "#" + driverClassName;
+ if (!REGISTERED_DRIVER_KEYS.add(driverKey)) {
+ LOG.info("JDBC driver already registered: " + driverClassName
+ " from " + driverUrl);
+ return driverUrl;
+ }
+ try {
+ ClassLoader classLoader =
DRIVER_CLASS_LOADER_CACHE.computeIfAbsent(url, u ->
+ URLClassLoader.newInstance(new URL[] {u},
parentClassLoader));
Review Comment:
Same resource-retention issue as the FE loader: caching `URLClassLoader`
without a removal/close lifecycle can leak JAR file handles and memory. Provide
a way to close/remove cached loaders when drivers are no longer needed (or use
a weak/bounded cache) to prevent unbounded growth.
##########
docker/thirdparties/docker-compose/hive/scripts/hive-metastore.sh:
##########
@@ -73,68 +76,99 @@ fi
hadoop fs -mkdir -p /user/doris/suites/
DATA_DIR="/mnt/scripts/data/"
-find "${DATA_DIR}" -type f -name "run.sh" -print0 | xargs -0 -n 1 -P
"${LOAD_PARALLEL}" -I {} bash -ec '
- START_TIME=$(date +%s)
- bash -e "{}" || (echo "Failed to executing script: {}" && exit 1)
- END_TIME=$(date +%s)
- EXECUTION_TIME=$((END_TIME - START_TIME))
- echo "Script: {} executed in $EXECUTION_TIME seconds"
-'
+run_scripts=()
+while IFS= read -r -d '' run_script; do
+ relative_run_script="${run_script#/mnt/scripts/}"
+ if bootstrap_item_selected "${BOOTSTRAP_GROUPS}" "run_sh"
"${relative_run_script}"; then
+ run_scripts+=("${run_script}")
+ fi
+done < <(find "${DATA_DIR}" -type f -name "run.sh" -print0)
+
+if (( ${#run_scripts[@]} > 0 )); then
+ printf '%s\0' "${run_scripts[@]}" | xargs -0 -P "${LOAD_PARALLEL}" -I {}
bash -ec '
+ START_TIME=$(date +%s)
+ bash -e "{}" || (echo "Failed to executing script: {}" && exit 1)
+ END_TIME=$(date +%s)
+ EXECUTION_TIME=$((END_TIME - START_TIME))
+ echo "Script: {} executed in $EXECUTION_TIME seconds"
+ '
+fi
# put data file
hadoop_put_pids=()
+hadoop_put_paths=()
hadoop fs -mkdir -p /user/doris/
+copy_to_hdfs_if_selected() {
+ local relative_path="$1"
+ local local_path="/mnt/scripts/${relative_path}"
+
+ if ! bootstrap_item_selected "${BOOTSTRAP_GROUPS}" "hdfs_dir"
"${relative_path}"; then
+ return
+ fi
+
+ if [[ ! -e "${local_path}" ]]; then
+ echo "${local_path} does not exist"
+ exit 1
+ fi
+
+ if [[ -d "${local_path}" && -z "$(ls "${local_path}")" ]]; then
+ echo "${local_path} does not exist"
Review Comment:
When the directory exists but is empty, the message `does not exist` is
incorrect and makes troubleshooting harder. Update the message to reflect the
real condition (e.g., 'is empty' or 'contains no files') so failures are
actionable.
##########
fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jdbc/JdbcDriverUtils.java:
##########
@@ -0,0 +1,169 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.common.jdbc;
+
+import org.apache.commons.codec.binary.Hex;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.log4j.Logger;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.net.URLClassLoader;
+import java.net.URLConnection;
+import java.security.MessageDigest;
+import java.security.NoSuchAlgorithmException;
+import java.sql.Connection;
+import java.sql.Driver;
+import java.sql.DriverManager;
+import java.sql.DriverPropertyInfo;
+import java.sql.SQLException;
+import java.sql.SQLFeatureNotSupportedException;
+import java.util.Map;
+import java.util.Properties;
+import java.util.Set;
+import java.util.concurrent.ConcurrentHashMap;
+
+public class JdbcDriverUtils {
+ private static final Logger LOG = Logger.getLogger(JdbcDriverUtils.class);
+ private static final int HTTP_TIMEOUT_MS = 10000;
+ private static final Map<URL, ClassLoader> DRIVER_CLASS_LOADER_CACHE = new
ConcurrentHashMap<>();
Review Comment:
Same resource-retention issue as the FE loader: caching `URLClassLoader`
without a removal/close lifecycle can leak JAR file handles and memory. Provide
a way to close/remove cached loaders when drivers are no longer needed (or use
a weak/bounded cache) to prevent unbounded growth.
##########
fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jdbc/JdbcDriverUtils.java:
##########
@@ -0,0 +1,169 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.common.jdbc;
+
+import org.apache.commons.codec.binary.Hex;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.log4j.Logger;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.net.URLClassLoader;
+import java.net.URLConnection;
+import java.security.MessageDigest;
+import java.security.NoSuchAlgorithmException;
+import java.sql.Connection;
+import java.sql.Driver;
+import java.sql.DriverManager;
+import java.sql.DriverPropertyInfo;
+import java.sql.SQLException;
+import java.sql.SQLFeatureNotSupportedException;
+import java.util.Map;
+import java.util.Properties;
+import java.util.Set;
+import java.util.concurrent.ConcurrentHashMap;
+
+public class JdbcDriverUtils {
+ private static final Logger LOG = Logger.getLogger(JdbcDriverUtils.class);
+ private static final int HTTP_TIMEOUT_MS = 10000;
+ private static final Map<URL, ClassLoader> DRIVER_CLASS_LOADER_CACHE = new
ConcurrentHashMap<>();
+ private static final Set<String> REGISTERED_DRIVER_KEYS =
ConcurrentHashMap.newKeySet();
+
+ private JdbcDriverUtils() {
+ }
+
+ public static String registerDriver(String driverUrl, String
driverClassName, ClassLoader parentClassLoader) {
+ return registerDriver(driverUrl, driverClassName, "",
parentClassLoader);
+ }
+
+ public static String registerDriver(String driverUrl, String
driverClassName, String expectedChecksum,
+ ClassLoader parentClassLoader) {
+ if (StringUtils.isBlank(driverClassName)) {
+ throw new IllegalArgumentException("JDBC driver class is required
when driver url is specified.");
+ }
+ validateDriverChecksum(driverUrl, expectedChecksum);
+
+ try {
+ URL url = new URL(driverUrl);
+ String driverKey = driverUrl + "#" + driverClassName;
+ if (!REGISTERED_DRIVER_KEYS.add(driverKey)) {
+ LOG.info("JDBC driver already registered: " + driverClassName
+ " from " + driverUrl);
+ return driverUrl;
+ }
+ try {
+ ClassLoader classLoader =
DRIVER_CLASS_LOADER_CACHE.computeIfAbsent(url, u ->
+ URLClassLoader.newInstance(new URL[] {u},
parentClassLoader));
+ Class<?> loadedDriverClass = Class.forName(driverClassName,
true, classLoader);
+ Driver driver = (Driver)
loadedDriverClass.getDeclaredConstructor().newInstance();
+ DriverManager.registerDriver(new DriverShim(driver));
+ LOG.info("Successfully registered JDBC driver: " +
driverClassName + " from " + driverUrl);
+ return driverUrl;
+ } catch (ClassNotFoundException e) {
+ REGISTERED_DRIVER_KEYS.remove(driverKey);
+ throw new IllegalArgumentException("Failed to load JDBC driver
class: " + driverClassName, e);
+ } catch (Exception e) {
+ REGISTERED_DRIVER_KEYS.remove(driverKey);
+ throw new RuntimeException("Failed to register JDBC driver: "
+ driverClassName, e);
+ }
+ } catch (MalformedURLException e) {
+ throw new IllegalArgumentException("Invalid driver URL: " +
driverUrl, e);
+ }
+ }
+
+ public static String validateDriverChecksum(String driverUrl, String
expectedChecksum) {
+ String actualChecksum = computeObjectChecksum(driverUrl);
+ if (StringUtils.isNotBlank(expectedChecksum) &&
!expectedChecksum.equals(actualChecksum)) {
+ throw new IllegalArgumentException("Checksum mismatch for JDBC
driver. expected: "
+ + expectedChecksum + ", actual: " + actualChecksum);
+ }
+ return actualChecksum;
+ }
+
+ public static String computeObjectChecksum(String urlStr) {
+ try (InputStream inputStream = getInputStreamFromUrl(urlStr,
HTTP_TIMEOUT_MS, HTTP_TIMEOUT_MS)) {
+ MessageDigest digest = MessageDigest.getInstance("MD5");
Review Comment:
Using MD5 for integrity validation is collision-prone and not appropriate if
this checksum is intended to provide tamper resistance for driver artifacts.
Prefer SHA-256 (or stronger) and update the expected checksum format
accordingly; if MD5 is intentionally used only for corruption detection,
document that this is not a security guarantee.
##########
fe/fe-core/src/main/java/org/apache/doris/catalog/JdbcDriverLoader.java:
##########
@@ -0,0 +1,140 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.catalog;
+
+import org.apache.doris.common.DdlException;
+
+import org.apache.commons.lang3.StringUtils;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.net.URLClassLoader;
+import java.sql.Connection;
+import java.sql.Driver;
+import java.sql.DriverManager;
+import java.sql.DriverPropertyInfo;
+import java.sql.SQLException;
+import java.sql.SQLFeatureNotSupportedException;
+import java.util.Map;
+import java.util.Properties;
+import java.util.Set;
+import java.util.concurrent.ConcurrentHashMap;
+
+public class JdbcDriverLoader {
+ private static final Logger LOG =
LogManager.getLogger(JdbcDriverLoader.class);
+ private static final Map<URL, ClassLoader> DRIVER_CLASS_LOADER_CACHE = new
ConcurrentHashMap<>();
Review Comment:
Caching `URLClassLoader` instances in a static map without any
eviction/close path can leak resources (open JAR handles on some platforms,
class metadata over time, and permanent retention of downloaded drivers).
Consider adding an explicit unregister/close mechanism (and removing from the
cache), or use a bounded/weak cache so long-running processes don’t accumulate
classloaders indefinitely.
##########
regression-test/pipeline/common/get-hive-bootstrap-groups.sh:
##########
@@ -0,0 +1,42 @@
+#!/usr/bin/env bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -eo pipefail
+
+usage() {
+ echo "Usage: $0 <hive2|hive3|both|all>" >&2
+ exit 1
+}
+
+case "${1:-}" in
+hive2)
+ echo "common,hive2_only"
+ ;;
+hive3)
+ echo "common,hive3_only"
+ ;;
+both)
+ echo "common,hive2_only,hive3_only"
+ ;;
+all)
+ echo "all"
+ ;;
Review Comment:
This script hardcodes the same group semantics that are also implemented in
`docker/.../bootstrap/bootstrap-groups.sh`. Keeping two sources of truth
increases the risk of divergence (e.g., adding a new group later). Consider
reusing the shared normalization/merge logic (or a shared list/config) so group
definitions live in one place.
##########
fe/fe-core/src/test/java/org/apache/doris/catalog/JdbcDriverLoaderTest.java:
##########
@@ -0,0 +1,85 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.catalog;
+
+import org.apache.doris.common.DdlException;
+import org.apache.doris.common.FeConstants;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+
+public class JdbcDriverLoaderTest {
+ private static final String DRIVER_BYTES = "doris-jdbc-driver";
+ private static final String DRIVER_CHECKSUM =
"ef2b3218a863c98bc78fdf447331b206";
+
+ @Test
+ public void testValidateDriverChecksumRejectsMismatch() throws Exception {
+ boolean oldRunningUnitTest = FeConstants.runningUnitTest;
+ FeConstants.runningUnitTest = false;
+ try {
+ String driverUrl = createDriverUrl();
+
+ DdlException exception = Assert.assertThrows(DdlException.class,
+ () -> JdbcDriverLoader.validateDriverChecksum(driverUrl,
"bad-checksum"));
+ Assert.assertTrue(exception.getMessage().contains("does not match
the computed checksum"));
+ } finally {
+ FeConstants.runningUnitTest = oldRunningUnitTest;
+ }
+ }
+
+ @Test
+ public void testRegisterDriverValidatesChecksumBeforeLoadingClass() throws
Exception {
+ boolean oldRunningUnitTest = FeConstants.runningUnitTest;
+ FeConstants.runningUnitTest = false;
+ try {
+ String driverUrl = createDriverUrl();
+
+ IllegalArgumentException exception =
Assert.assertThrows(IllegalArgumentException.class,
+ () -> JdbcDriverLoader.registerDriver(driverUrl,
+ "org.apache.doris.catalog.NotExistingDriver",
"bad-checksum",
+ getClass().getClassLoader()));
+ Assert.assertTrue(exception.getMessage().contains("does not match
the computed checksum"));
+ } finally {
+ FeConstants.runningUnitTest = oldRunningUnitTest;
+ }
+ }
+
+ @Test
+ public void testValidateDriverChecksumReturnsComputedChecksum() throws
Exception {
+ boolean oldRunningUnitTest = FeConstants.runningUnitTest;
+ FeConstants.runningUnitTest = false;
+ try {
+ String driverUrl = createDriverUrl();
+
+ Assert.assertEquals(DRIVER_CHECKSUM,
JdbcDriverLoader.validateDriverChecksum(driverUrl, DRIVER_CHECKSUM));
+ Assert.assertEquals(DRIVER_CHECKSUM,
JdbcDriverLoader.validateDriverChecksum(driverUrl, ""));
+ } finally {
+ FeConstants.runningUnitTest = oldRunningUnitTest;
+ }
+ }
+
+ private String createDriverUrl() throws Exception {
+ Path driverPath = Files.createTempFile("doris-jdbc-driver", ".jar");
+ Files.write(driverPath, DRIVER_BYTES.getBytes(StandardCharsets.UTF_8));
+ return "file://" + driverPath.toAbsolutePath();
Review Comment:
The temp file is never deleted, and the file URL is built via string
concatenation (which can produce invalid URLs for paths needing escaping).
Prefer using `driverPath.toUri().toString()` (or `toUri().toURL().toString()`)
and ensure the temp file is cleaned up (e.g., `deleteOnExit()` or explicit
deletion in a `finally`) to avoid polluting the test environment.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]