This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-4.0 by this push:
new 1eada5729dd branch-4.0: [fix](build) Backport JFS support from
branch-4.1 (#62927)
1eada5729dd is described below
commit 1eada5729ddcd4cbfae7d9b23f7d24cf87e1ef43
Author: Chenjunwei <[email protected]>
AuthorDate: Sat May 9 15:52:50 2026 +0800
branch-4.0: [fix](build) Backport JFS support from branch-4.1 (#62927)
### What problem does this PR solve?
Issue Number: None
Related PR: #61706, #62157, #62732
Problem Summary: Backport the JFS support already present on branch-4.1
to branch-4.0. This change makes JFS behave as an HDFS-compatible
filesystem in FE, packages the JuiceFS Hadoop jar in FE/BE outputs,
teaches thirdparty download/build scripts to prefer the internal JuiceFS
mirror before falling back to Maven Central, and wires Hive docker
helpers plus a regression case for JFS HMS catalog reads.
### Release note
Branch-4.0 gains JFS HMS catalog compatibility plus JuiceFS runtime
packaging and download support.
### Check List (For Author)
- Test: FE unit tests and script tests
- Unit Test / Manual test
- Behavior changed: Yes (branch-4.0 now treats JFS as HDFS-compatible
and packages JuiceFS runtime jars)
- Does this need documentation: No
---
build.sh | 27 +-
.../docker-compose/hive/hadoop-hive-3x.env.tpl | 3 +-
.../docker-compose/hive/hadoop-hive.env.tpl | 3 +-
.../docker-compose/hive/hive-2x_settings.env | 3 +
.../docker-compose/hive/hive-3x.yaml.tpl | 1 +
.../docker-compose/hive/hive-3x_settings.env | 5 +-
.../docker-compose/hive/scripts/hive-metastore.sh | 14 +-
docker/thirdparties/juicefs-helpers.sh | 301 +++++++++++++++++++++
docker/thirdparties/run-thirdparties-docker.sh | 19 ++
.../test/juicefs-helpers-mirror-test.sh | 68 +++++
.../java/org/apache/doris/analysis/BrokerDesc.java | 2 +-
.../org/apache/doris/analysis/OutFileClause.java | 6 +-
.../java/org/apache/doris/catalog/Resource.java | 3 +
.../property/storage/HdfsProperties.java | 5 +-
.../java/org/apache/doris/fs/SchemaTypeMapper.java | 4 +-
.../apache/doris/common/util/LocationPathTest.java | 6 +-
.../property/storage/HdfsPropertiesTest.java | 20 ++
.../property/storage/HdfsPropertiesUtilsTest.java | 18 +-
.../plans/commands/CreateResourceCommandTest.java | 5 +
.../test_jfs_hms_catalog_read.groovy | 141 ++++++++++
thirdparty/build-thirdparty.sh | 11 +
thirdparty/download-thirdparty.sh | 30 ++
.../test/download-thirdparty-fallback-test.sh | 99 +++++++
thirdparty/vars.sh | 7 +
24 files changed, 780 insertions(+), 21 deletions(-)
diff --git a/build.sh b/build.sh
index 4e3726d34c5..eff6b482ec8 100755
--- a/build.sh
+++ b/build.sh
@@ -65,9 +65,10 @@ Usage: $0 <options>
USE_AVX2 If the CPU does not support AVX2 instruction
set, please set USE_AVX2=0. Default is ON.
ARM_MARCH Specify the ARM architecture instruction set.
Default is armv8-a+crc.
STRIP_DEBUG_INFO If set STRIP_DEBUG_INFO=ON, the debug
information in the compiled binaries will be stored separately in the
'be/lib/debug_info' directory. Default is OFF.
- DISABLE_BE_JAVA_EXTENSIONS If set DISABLE_BE_JAVA_EXTENSIONS=ON, we will
do not build binary with java-udf,hadoop-hudi-scanner,jdbc-scanner and so on
Default is OFF.
- DISABLE_JAVA_CHECK_STYLE If set DISABLE_JAVA_CHECK_STYLE=ON, it will
skip style check of java code in FE.
- DISABLE_BUILD_AZURE If set DISABLE_BUILD_AZURE=ON, it will not
build azure into BE.
+ DISABLE_BE_JAVA_EXTENSIONS If set DISABLE_BE_JAVA_EXTENSIONS=ON, we will
do not build binary with java-udf,hadoop-hudi-scanner,jdbc-scanner and so on
Default is OFF.
+ DISABLE_JAVA_CHECK_STYLE If set DISABLE_JAVA_CHECK_STYLE=ON, it will
skip style check of java code in FE.
+ DISABLE_BUILD_AZURE If set DISABLE_BUILD_AZURE=ON, it will not
build azure into BE.
+ DISABLE_BUILD_JUICEFS If set DISABLE_BUILD_JUICEFS=ON, it will skip
packaging juicefs-hadoop jar into FE/BE output.
Eg.
$0 build all
@@ -474,6 +475,13 @@ if [[ "$(echo "${DISABLE_BUILD_AZURE}" | tr '[:lower:]'
'[:upper:]')" == "ON" ]]
BUILD_AZURE='OFF'
fi
+if [[ "$(echo "${DISABLE_BUILD_JUICEFS}" | tr '[:lower:]' '[:upper:]')" ==
"ON" ]]; then
+ BUILD_JUICEFS='OFF'
+else
+ BUILD_JUICEFS='ON'
+fi
+export DISABLE_BUILD_JUICEFS
+
if [[ -z "${ENABLE_INJECTION_POINT}" ]]; then
ENABLE_INJECTION_POINT='OFF'
fi
@@ -522,6 +530,7 @@ echo "Get params:
BUILD_BE_JAVA_EXTENSIONS -- ${BUILD_BE_JAVA_EXTENSIONS}
BUILD_BE_CDC_CLIENT -- ${BUILD_BE_CDC_CLIENT}
BUILD_HIVE_UDF -- ${BUILD_HIVE_UDF}
+ BUILD_JUICEFS -- ${BUILD_JUICEFS}
PARALLEL -- ${PARALLEL}
CLEAN -- ${CLEAN}
GLIBC_COMPATIBILITY -- ${GLIBC_COMPATIBILITY}
@@ -796,7 +805,7 @@ if [[ "${BUILD_FE}" -eq 1 ]]; then
cp -r -p "${DORIS_HOME}/conf/ldap.conf" "${DORIS_OUTPUT}/fe/conf"/
cp -r -p "${DORIS_HOME}/conf/mysql_ssl_default_certificate"
"${DORIS_OUTPUT}/fe/"/
rm -rf "${DORIS_OUTPUT}/fe/lib"/*
- install -d "${DORIS_OUTPUT}/fe/lib/jindofs"
+ install -d "${DORIS_OUTPUT}/fe/lib/jindofs"
"${DORIS_OUTPUT}/fe/lib/juicefs"
cp -r -p "${DORIS_HOME}/fe/fe-core/target/lib"/* "${DORIS_OUTPUT}/fe/lib"/
cp -r -p "${DORIS_HOME}/fe/fe-core/target/doris-fe.jar"
"${DORIS_OUTPUT}/fe/lib"/
if [[ "${WITH_TDE_DIR}" != "" ]]; then
@@ -815,6 +824,10 @@ if [[ "${BUILD_FE}" -eq 1 ]]; then
cp -r -p
"${DORIS_THIRDPARTY}"/installed/jindofs_libs/jindo-sdk-[0-9]*.jar
"${DORIS_OUTPUT}/fe/lib/jindofs"/
fi
+ if [[ "${BUILD_JUICEFS}" == "ON" ]]; then
+ cp -r -p
"${DORIS_THIRDPARTY}"/installed/juicefs_libs/juicefs-hadoop-[0-9]*.jar
"${DORIS_OUTPUT}/fe/lib/juicefs"/
+ fi
+
cp -r -p "${DORIS_HOME}/minidump" "${DORIS_OUTPUT}/fe"/
cp -r -p "${DORIS_HOME}/webroot/static" "${DORIS_OUTPUT}/fe/webroot"/
@@ -981,7 +994,7 @@ EOF
done
# copy jindofs jars, only support for Linux x64 or arm
- install -d "${DORIS_OUTPUT}/be/lib/java_extensions/jindofs"/
+ install -d "${DORIS_OUTPUT}/be/lib/java_extensions/jindofs"/
"${DORIS_OUTPUT}/be/lib/java_extensions/juicefs"/
if [[ "${TARGET_SYSTEM}" == 'Linux' ]] && [[ "$TARGET_ARCH" == 'x86_64'
]]; then
cp -r -p
"${DORIS_THIRDPARTY}"/installed/jindofs_libs/jindo-core-[0-9]*.jar
"${DORIS_OUTPUT}/be/lib/java_extensions/jindofs"/
cp -r -p
"${DORIS_THIRDPARTY}"/installed/jindofs_libs/jindo-core-linux-ubuntu22-x86_64-[0-9]*.jar
"${DORIS_OUTPUT}/be/lib/java_extensions/jindofs"/
@@ -991,6 +1004,10 @@ EOF
cp -r -p
"${DORIS_THIRDPARTY}"/installed/jindofs_libs/jindo-sdk-[0-9]*.jar
"${DORIS_OUTPUT}/be/lib/java_extensions/jindofs"/
fi
+ if [[ "${BUILD_JUICEFS}" == "ON" ]]; then
+ cp -r -p
"${DORIS_THIRDPARTY}"/installed/juicefs_libs/juicefs-hadoop-[0-9]*.jar
"${DORIS_OUTPUT}/be/lib/java_extensions/juicefs"/
+ fi
+
cp -r -p "${DORIS_THIRDPARTY}/installed/webroot"/*
"${DORIS_OUTPUT}/be/www"/
copy_common_files "${DORIS_OUTPUT}/be/"
mkdir -p "${DORIS_OUTPUT}/be/log"
diff --git a/docker/thirdparties/docker-compose/hive/hadoop-hive-3x.env.tpl
b/docker/thirdparties/docker-compose/hive/hadoop-hive-3x.env.tpl
index 967da876ec6..24fb144d0e0 100644
--- a/docker/thirdparties/docker-compose/hive/hadoop-hive-3x.env.tpl
+++ b/docker/thirdparties/docker-compose/hive/hadoop-hive-3x.env.tpl
@@ -22,6 +22,8 @@
HIVE_SITE_CONF_hive_metastore_transactional_event_listeners=org.apache.hive.hcat
HIVE_SITE_CONF_hive_stats_column_autogather=false
HIVE_SITE_CONF_fs_s3_impl=org.apache.hadoop.fs.s3a.S3AFileSystem
HIVE_SITE_CONF_fs_s3a_impl=org.apache.hadoop.fs.s3a.S3AFileSystem
+HIVE_SITE_CONF_fs_jfs_impl=io.juicefs.JuiceFileSystem
+HIVE_SITE_CONF_juicefs_cluster_meta=${JFS_CLUSTER_META}
HIVE_SITE_CONF_fs_s3a_access_key=${AWSAk}
HIVE_SITE_CONF_fs_s3a_secret_key=${AWSSk}
HIVE_SITE_CONF_fs_s3a_endpoint=${AWSEndpoint}
@@ -50,4 +52,3 @@
HIVE_SITE_CONF_fs_gs_auth_service_account_private_key_id=${GCSAccountPrivateKeyI
HIVE_SITE_CONF_fs_gs_auth_service_account_private_key=${GCSAccountPrivateKey}
HIVE_SITE_CONF_fs_gs_proxy_address=${GCSProxyAddress}
enablePaimonHms=${enablePaimonHms}
-
diff --git a/docker/thirdparties/docker-compose/hive/hadoop-hive.env.tpl
b/docker/thirdparties/docker-compose/hive/hadoop-hive.env.tpl
index ecbf735216b..d48d497bafa 100644
--- a/docker/thirdparties/docker-compose/hive/hadoop-hive.env.tpl
+++ b/docker/thirdparties/docker-compose/hive/hadoop-hive.env.tpl
@@ -32,6 +32,8 @@ HIVE_SITE_CONF_hive_stats_column_autogather=false
HIVE_SITE_CONF_hive_exec_parallel=true
CORE_CONF_fs_defaultFS=hdfs://${IP_HOST}:${FS_PORT}
+CORE_CONF_fs_jfs_impl=io.juicefs.JuiceFileSystem
+CORE_CONF_juicefs_cluster_meta=${JFS_CLUSTER_META}
CORE_CONF_hadoop_http_staticuser_user=root
CORE_CONF_hadoop_proxyuser_hue_hosts=*
CORE_CONF_hadoop_proxyuser_hue_groups=*
@@ -62,4 +64,3 @@ HADOOP_HEAPSIZE=4096
NEED_LOAD_DATA=${NEED_LOAD_DATA}
LOAD_PARALLEL=${LOAD_PARALLEL}
-
diff --git a/docker/thirdparties/docker-compose/hive/hive-2x_settings.env
b/docker/thirdparties/docker-compose/hive/hive-2x_settings.env
index 9045bb91683..7db93749207 100644
--- a/docker/thirdparties/docker-compose/hive/hive-2x_settings.env
+++ b/docker/thirdparties/docker-compose/hive/hive-2x_settings.env
@@ -24,3 +24,6 @@ export FS_PORT=8020 # should be same as hive2HmsPort in
regression-conf.groovy
export HMS_PORT=9083 # should be same as hive2HmsPort in regression-conf.groovy
export HS_PORT=10000 # should be same as hive2ServerPort in
regression-conf.groovy
export PG_PORT=5432 # should be same as hive2PgPort in regression-conf.groovy
+
+# JuiceFS metadata endpoint for property `juicefs.cluster.meta`.
+export
JFS_CLUSTER_META="${JFS_CLUSTER_META:-mysql://root:123456@(127.0.0.1:3316)/juicefs_meta}"
diff --git a/docker/thirdparties/docker-compose/hive/hive-3x.yaml.tpl
b/docker/thirdparties/docker-compose/hive/hive-3x.yaml.tpl
index 5118b6bd65d..d6e4b1cfba5 100644
--- a/docker/thirdparties/docker-compose/hive/hive-3x.yaml.tpl
+++ b/docker/thirdparties/docker-compose/hive/hive-3x.yaml.tpl
@@ -92,6 +92,7 @@ services:
- "${HMS_PORT}"
volumes:
- ./scripts:/mnt/scripts
+ - /tmp/jfs-bucket:/tmp/jfs-bucket
depends_on:
hive-metastore-postgresql:
condition: service_healthy
diff --git a/docker/thirdparties/docker-compose/hive/hive-3x_settings.env
b/docker/thirdparties/docker-compose/hive/hive-3x_settings.env
index bf2bc4424f8..60db1e93951 100644
--- a/docker/thirdparties/docker-compose/hive/hive-3x_settings.env
+++ b/docker/thirdparties/docker-compose/hive/hive-3x_settings.env
@@ -25,6 +25,9 @@ export HMS_PORT=9383 # should be same as hive3HmsPort in
regression-conf.groovy
export HS_PORT=13000 # should be same as hive3ServerPort in
regression-conf.groovy
export PG_PORT=5732 # should be same as hive3PgPort in regression-conf.groovy
+# JuiceFS metadata endpoint for property `juicefs.cluster.meta`.
+export
JFS_CLUSTER_META="${JFS_CLUSTER_META:-mysql://root:123456@(127.0.0.1:3316)/juicefs_meta}"
+
# prepare for paimon hms test,control load paimon hms data or not
export enablePaimonHms="false"
# hms on s3/oss/obs/cos
@@ -44,4 +47,4 @@ export GCSProjectId=""
export GCSAccountEmail=""
export GCSAccountPrivateKeyId=""
export GCSAccountPrivateKey=""
-export GCSProxyAddress=""
\ No newline at end of file
+export GCSProxyAddress=""
diff --git a/docker/thirdparties/docker-compose/hive/scripts/hive-metastore.sh
b/docker/thirdparties/docker-compose/hive/scripts/hive-metastore.sh
index d2ac9fa17a1..69d5af071b7 100755
--- a/docker/thirdparties/docker-compose/hive/scripts/hive-metastore.sh
+++ b/docker/thirdparties/docker-compose/hive/scripts/hive-metastore.sh
@@ -27,9 +27,21 @@ for file in "${AUX_LIB}"/*.tar.gz; do
done
ls "${AUX_LIB}/"
-# copy auxiliary jars to hive lib, avoid jars copy
+# Keep existing behavior for Hive metastore classpath.
cp -r "${AUX_LIB}"/* /opt/hive/lib/
+# Add JuiceFS jar into Hadoop classpath for `hadoop fs jfs://...`.
+shopt -s nullglob
+juicefs_jars=("${AUX_LIB}"/juicefs-hadoop-*.jar)
+if (( ${#juicefs_jars[@]} > 0 )); then
+ for target in /opt/hadoop-3.2.1/share/hadoop/common/lib
/opt/hadoop/share/hadoop/common/lib; do
+ if [[ -d "${target}" ]]; then
+ cp -f "${juicefs_jars[@]}" "${target}"/
+ fi
+ done
+fi
+shopt -u nullglob
+
# start metastore
nohup /opt/hive/bin/hive --service metastore &
diff --git a/docker/thirdparties/juicefs-helpers.sh
b/docker/thirdparties/juicefs-helpers.sh
new file mode 100644
index 00000000000..a451b2547f2
--- /dev/null
+++ b/docker/thirdparties/juicefs-helpers.sh
@@ -0,0 +1,301 @@
+#!/usr/bin/env bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Shared JuiceFS helper functions used by build and docker scripts.
+
+JUICEFS_DEFAULT_VERSION="${JUICEFS_DEFAULT_VERSION:-1.3.1}"
+MAVEN_REPOSITORY_URL="${MAVEN_REPOSITORY_URL:-https://repo1.maven.org/maven2}"
+JUICEFS_THIRDPARTY_REPOSITORY_URL="${JUICEFS_THIRDPARTY_REPOSITORY_URL:-}"
+JUICEFS_DEFAULT_THIRDPARTY_REPOSITORY_URL="${JUICEFS_DEFAULT_THIRDPARTY_REPOSITORY_URL:-}"
+JUICEFS_HADOOP_MAVEN_REPO="${JUICEFS_HADOOP_MAVEN_REPO:-${MAVEN_REPOSITORY_URL}/io/juicefs/juicefs-hadoop}"
+JUICEFS_CLI_RELEASE_REPO="${JUICEFS_CLI_RELEASE_REPO:-https://github.com/juicedata/juicefs/releases/download}"
+
+juicefs_default_thirdparty_repository_url() {
+ if [[ -n "${JUICEFS_DEFAULT_THIRDPARTY_REPOSITORY_URL}" ]]; then
+ echo "${JUICEFS_DEFAULT_THIRDPARTY_REPOSITORY_URL%/}"
+ return 0
+ fi
+ if [[ -n "${s3BucketName:-}" && -n "${s3Endpoint:-}" ]]; then
+ echo
"https://${s3BucketName}.${s3Endpoint}/regression/datalake/thirdparty/juicefs"
+ return 0
+ fi
+ echo "https://doris-thirdparty-repo.bj.bcebos.com/thirdparty"
+}
+
+juicefs_thirdparty_repository_url() {
+ local
repository_url="${JUICEFS_THIRDPARTY_REPOSITORY_URL:-${REPOSITORY_URL:-}}"
+ if [[ -z "${repository_url}" ]]; then
+ repository_url=$(juicefs_default_thirdparty_repository_url)
+ fi
+ echo "${repository_url%/}"
+}
+
+juicefs_repository_file_url() {
+ local filename="$1"
+ echo "$(juicefs_thirdparty_repository_url)/${filename}"
+}
+
+juicefs_find_hadoop_jar_by_globs() {
+ local jar_glob=""
+ local matched_jar=""
+ for jar_glob in "$@"; do
+ matched_jar=$(compgen -G "${jar_glob}" | head -n 1 || true)
+ if [[ -n "${matched_jar}" ]]; then
+ echo "${matched_jar}"
+ return 0
+ fi
+ done
+ return 1
+}
+
+juicefs_detect_hadoop_version() {
+ local juicefs_jar="$1"
+ local default_version="${2:-${JUICEFS_DEFAULT_VERSION}}"
+ if [[ -z "${juicefs_jar}" ]]; then
+ echo "${default_version}"
+ return 0
+ fi
+ juicefs_jar=$(basename "${juicefs_jar}")
+ juicefs_jar=${juicefs_jar#juicefs-hadoop-}
+ echo "${juicefs_jar%.jar}"
+}
+
+juicefs_hadoop_jar_download_url() {
+ local juicefs_version="$1"
+ local jar_name="juicefs-hadoop-${juicefs_version}.jar"
+ echo "${JUICEFS_HADOOP_MAVEN_REPO}/${juicefs_version}/${jar_name}"
+}
+
+juicefs_hadoop_jar_download_urls() {
+ local juicefs_version="$1"
+ local jar_name="juicefs-hadoop-${juicefs_version}.jar"
+ printf '%s\n' \
+ "$(juicefs_repository_file_url "${jar_name}")" \
+ "$(juicefs_hadoop_jar_download_url "${juicefs_version}")"
+}
+
+juicefs_cli_archive_name() {
+ local juicefs_version="$1"
+ echo "juicefs-${juicefs_version}-linux-amd64.tar.gz"
+}
+
+juicefs_cli_archive_mirror_url() {
+ local juicefs_version="$1"
+ local archive_name
+ archive_name=$(juicefs_cli_archive_name "${juicefs_version}")
+ juicefs_repository_file_url "${archive_name}"
+}
+
+juicefs_cli_archive_download_url() {
+ local juicefs_version="$1"
+ local archive_name
+ archive_name=$(juicefs_cli_archive_name "${juicefs_version}")
+ echo "${JUICEFS_CLI_RELEASE_REPO}/v${juicefs_version}/${archive_name}"
+}
+
+juicefs_cli_archive_download_urls() {
+ local juicefs_version="$1"
+ printf '%s\n' \
+ "$(juicefs_cli_archive_mirror_url "${juicefs_version}")" \
+ "$(juicefs_cli_archive_download_url "${juicefs_version}")"
+}
+
+juicefs_download_file() {
+ local target_path="$1"
+ local download_label="$2"
+ shift 2
+
+ local download_url=""
+ mkdir -p "$(dirname "${target_path}")"
+ for download_url in "$@"; do
+ [[ -n "${download_url}" ]] || continue
+ echo "Downloading ${download_label} from ${download_url}" >&2
+ if command -v curl >/dev/null 2>&1; then
+ if curl -fL --retry 3 --retry-delay 2 --connect-timeout 10 -o
"${target_path}" "${download_url}"; then
+ return 0
+ fi
+ elif command -v wget >/dev/null 2>&1; then
+ if wget -q "${download_url}" -O "${target_path}"; then
+ return 0
+ fi
+ fi
+ done
+
+ rm -f "${target_path}"
+ return 1
+}
+
+juicefs_download_hadoop_jar_to_cache() {
+ local juicefs_version="$1"
+ local cache_dir="$2"
+ local jar_name="juicefs-hadoop-${juicefs_version}.jar"
+ local target_jar="${cache_dir}/${jar_name}"
+ local -a download_urls=()
+
+ mkdir -p "${cache_dir}"
+ if [[ -s "${target_jar}" ]]; then
+ echo "${target_jar}"
+ return 0
+ fi
+
+ mapfile -t download_urls < <(juicefs_hadoop_jar_download_urls
"${juicefs_version}")
+ if juicefs_download_file "${target_jar}" "JuiceFS Hadoop jar
${juicefs_version}" "${download_urls[@]}"; then
+ echo "${target_jar}"
+ return 0
+ fi
+
+ return 1
+}
+
+juicefs_init_runtime_vars() {
+ DORIS_ROOT="${DORIS_ROOT:-$(cd "${ROOT}/../.." &>/dev/null && pwd)}"
+ JUICEFS_RUNTIME_ROOT="${JUICEFS_RUNTIME_ROOT:-${ROOT}/juicefs}"
+
JUICEFS_LOCAL_BIN="${JUICEFS_LOCAL_BIN:-${JUICEFS_RUNTIME_ROOT}/bin/juicefs}"
+}
+
+juicefs_find_runtime_hadoop_jar() {
+ local -a jar_globs=(
+ "${JUICEFS_RUNTIME_ROOT}/lib/juicefs-hadoop-[0-9]*.jar"
+ "${ROOT}/docker-compose/hive/scripts/auxlib/juicefs-hadoop-[0-9]*.jar"
+
"${DORIS_ROOT}/thirdparty/installed/juicefs_libs/juicefs-hadoop-[0-9]*.jar"
+ "${DORIS_ROOT}/output/fe/lib/juicefs/juicefs-hadoop-[0-9]*.jar"
+
"${DORIS_ROOT}/output/be/lib/java_extensions/juicefs/juicefs-hadoop-[0-9]*.jar"
+ )
+ juicefs_find_hadoop_jar_by_globs "${jar_globs[@]}"
+}
+
+juicefs_resolve_cli() {
+ local juicefs_version
+ local archive_name
+ local tmp_dir
+ local extracted_bin
+ local cache_dir
+ local -a download_urls=()
+
+ if command -v juicefs >/dev/null 2>&1; then
+ command -v juicefs
+ return 0
+ fi
+
+ if [[ -x "${JUICEFS_LOCAL_BIN}" ]]; then
+ echo "${JUICEFS_LOCAL_BIN}"
+ return 0
+ fi
+
+ juicefs_version=$(juicefs_detect_hadoop_version
"$(juicefs_find_runtime_hadoop_jar || true)" "${JUICEFS_DEFAULT_VERSION}")
+ archive_name=$(juicefs_cli_archive_name "${juicefs_version}")
+ cache_dir="${JUICEFS_RUNTIME_ROOT}/bin"
+ mkdir -p "${cache_dir}"
+ tmp_dir=$(mktemp -d "${cache_dir}/tmp.XXXXXX")
+ mapfile -t download_urls < <(juicefs_cli_archive_download_urls
"${juicefs_version}")
+ if ! juicefs_download_file "${tmp_dir}/${archive_name}" "JuiceFS CLI
${juicefs_version}" "${download_urls[@]}"; then
+ rm -rf "${tmp_dir}"
+ return 1
+ fi
+ tar -xzf "${tmp_dir}/${archive_name}" -C "${tmp_dir}"
+ extracted_bin=$(find "${tmp_dir}" -maxdepth 2 -type f -name juicefs | head
-n 1)
+ if [[ -z "${extracted_bin}" ]]; then
+ rm -rf "${tmp_dir}"
+ return 1
+ fi
+ install -m 0755 "${extracted_bin}" "${JUICEFS_LOCAL_BIN}"
+ rm -rf "${tmp_dir}"
+ echo "${JUICEFS_LOCAL_BIN}"
+}
+
+juicefs_ensure_meta_database() {
+ local jfs_meta="$1"
+ local meta_db
+ local mysql_container
+
+ meta_db="${jfs_meta##*/}"
+ meta_db="${meta_db%%\?*}"
+ if [[ ! "${jfs_meta}" == mysql://* ]] || [[ ! "${meta_db}" =~
^[A-Za-z0-9_]+$ ]]; then
+ return 0
+ fi
+
+ mysql_container="${CONTAINER_UID}mysql_57"
+ if ! sudo docker ps --format '{{.Names}}' | grep -qx "${mysql_container}";
then
+ return 0
+ fi
+
+ sudo docker exec "${mysql_container}" mysql -uroot -p123456 -e \
+ "CREATE DATABASE IF NOT EXISTS \`${meta_db}\` CHARACTER SET utf8mb4
COLLATE utf8mb4_bin;" >/dev/null
+}
+
+juicefs_run_cli() {
+ local juicefs_cli
+ juicefs_cli=$(juicefs_resolve_cli)
+ "${juicefs_cli}" "$@"
+}
+
+juicefs_ensure_hadoop_jar_for_hive() {
+ local auxlib_dir="${ROOT}/docker-compose/hive/scripts/auxlib"
+ local source_jar
+ local target_jar
+ local source_realpath
+ local target_realpath
+ local juicefs_version
+
+ source_jar=$(juicefs_find_runtime_hadoop_jar || true)
+ if [[ -z "${source_jar}" ]]; then
+ juicefs_version=$(juicefs_detect_hadoop_version ""
"${JUICEFS_DEFAULT_VERSION}")
+ source_jar=$(juicefs_download_hadoop_jar_to_cache "${juicefs_version}"
"${JUICEFS_RUNTIME_ROOT}/lib" || true)
+ fi
+ if [[ -z "${source_jar}" ]]; then
+ echo "WARN: skip syncing juicefs-hadoop jar for hive, not found and
download failed."
+ return 0
+ fi
+
+ mkdir -p "${auxlib_dir}"
+ target_jar="${auxlib_dir}/$(basename "${source_jar}")"
+ source_realpath=$(realpath "${source_jar}")
+ if [[ -e "${target_jar}" ]]; then
+ target_realpath=$(realpath "${target_jar}")
+ if [[ "${source_realpath}" == "${target_realpath}" ]]; then
+ echo "JuiceFS Hadoop jar already present in hive auxlib:
$(basename "${source_jar}")"
+ return 0
+ fi
+ fi
+ cp -f "${source_jar}" "${target_jar}"
+ echo "Synced JuiceFS Hadoop jar to hive auxlib: $(basename
"${source_jar}")"
+}
+
+juicefs_prepare_meta_for_hive() {
+ local jfs_meta="$1"
+ local volume_name="$2"
+ local bucket_dir="/tmp/jfs-bucket/${volume_name}"
+
+ if [[ -z "${jfs_meta}" ]]; then
+ return 0
+ fi
+ if ! juicefs_resolve_cli >/dev/null 2>&1; then
+ echo "WARN: JuiceFS-dependent tests will fail. Ensure juicefs binary
is on PATH or mirror/github access is available." >&2
+ return 0
+ fi
+
+ mkdir -p "${bucket_dir}"
+ juicefs_ensure_meta_database "${jfs_meta}"
+ if juicefs_run_cli status "${jfs_meta}" >/dev/null 2>&1; then
+ return 0
+ fi
+ rm -rf "${bucket_dir:?}/"*
+ if ! juicefs_run_cli format --storage file --bucket "${bucket_dir}"
"${jfs_meta}" "${volume_name}"; then
+ juicefs_run_cli status "${jfs_meta}" >/dev/null 2>&1 || true
+ fi
+}
diff --git a/docker/thirdparties/run-thirdparties-docker.sh
b/docker/thirdparties/run-thirdparties-docker.sh
index 8731880a6bc..f1ebbcde5d3 100755
--- a/docker/thirdparties/run-thirdparties-docker.sh
+++ b/docker/thirdparties/run-thirdparties-docker.sh
@@ -25,6 +25,7 @@ set -eo pipefail
ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"
. "${ROOT}/custom_settings.env"
+. "${ROOT}/juicefs-helpers.sh"
usage() {
echo "
@@ -589,6 +590,7 @@ start_iceberg_rest() {
echo "starting dockers in parallel"
reserve_ports
+juicefs_init_runtime_vars
# Ensure hive data is downloaded before starting hive2/hive3, but only once
need_prepare_hive_data=0
@@ -603,6 +605,12 @@ if [[ $need_prepare_hive_data -eq 1 ]]; then
bash "${ROOT}/docker-compose/hive/scripts/prepare-hive-data.sh"
fi
+if [[ "${STOP}" -ne 1 ]]; then
+ if [[ "${RUN_HIVE2}" -eq 1 ]] || [[ "${RUN_HIVE3}" -eq 1 ]]; then
+ juicefs_ensure_hadoop_jar_for_hive
+ fi
+fi
+
declare -A pids
if [[ "${RUN_ES}" -eq 1 ]]; then
@@ -723,6 +731,17 @@ for compose in "${!pids[@]}"; do
fi
done
+if [[ "${STOP}" -ne 1 ]]; then
+ if [[ "${RUN_HIVE2}" -eq 1 ]]; then
+ . "${ROOT}/docker-compose/hive/hive-2x_settings.env"
+ juicefs_prepare_meta_for_hive "${JFS_CLUSTER_META}" "cluster"
+ fi
+ if [[ "${RUN_HIVE3}" -eq 1 ]]; then
+ . "${ROOT}/docker-compose/hive/hive-3x_settings.env"
+ juicefs_prepare_meta_for_hive "${JFS_CLUSTER_META}" "cluster"
+ fi
+fi
+
echo "docker started"
sudo docker ps -a --format "{{.ID}} | {{.Image}} | {{.Status}}"
echo "all dockers started successfully"
diff --git a/docker/thirdparties/test/juicefs-helpers-mirror-test.sh
b/docker/thirdparties/test/juicefs-helpers-mirror-test.sh
new file mode 100644
index 00000000000..407b42dac68
--- /dev/null
+++ b/docker/thirdparties/test/juicefs-helpers-mirror-test.sh
@@ -0,0 +1,68 @@
+#!/usr/bin/env bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -eo pipefail
+
+ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." &>/dev/null && pwd)"
+. "${ROOT}/juicefs-helpers.sh"
+
+fail() {
+ echo "FAIL: $*" >&2
+ exit 1
+}
+
+assert_eq() {
+ local expected="$1"
+ local actual="$2"
+ [[ "${actual}" == "${expected}" ]] || fail "expected '${expected}', got
'${actual}'"
+}
+
+assert_lines() {
+ local expected="$1"
+ shift
+ local actual
+ actual="$("$@")"
+ [[ "${actual}" == "${expected}" ]] || fail "expected
lines:\n${expected}\nactual:\n${actual}"
+}
+
+assert_lines
$'https://doris-thirdparty-repo.bj.bcebos.com/thirdparty/juicefs-hadoop-1.3.1.jar\nhttps://repo1.maven.org/maven2/io/juicefs/juicefs-hadoop/1.3.1/juicefs-hadoop-1.3.1.jar'
\
+ juicefs_hadoop_jar_download_urls 1.3.1
+
+(
+ unset JUICEFS_THIRDPARTY_REPOSITORY_URL
+ unset REPOSITORY_URL
+ unset JUICEFS_DEFAULT_THIRDPARTY_REPOSITORY_URL
+ s3BucketName="doris-regression-bj"
+ s3Endpoint="oss-cn-beijing.aliyuncs.com"
+ . "${ROOT}/juicefs-helpers.sh"
+ assert_eq
"https://doris-regression-bj.oss-cn-beijing.aliyuncs.com/regression/datalake/thirdparty/juicefs"
\
+ "$(juicefs_thirdparty_repository_url)"
+)
+
+REPOSITORY_URL="https://mirror.example.com/thirdparty/" \
+assert_lines
$'https://mirror.example.com/thirdparty/juicefs-hadoop-1.3.1.jar\nhttps://repo1.maven.org/maven2/io/juicefs/juicefs-hadoop/1.3.1/juicefs-hadoop-1.3.1.jar'
\
+ juicefs_hadoop_jar_download_urls 1.3.1
+
+assert_eq
"https://doris-thirdparty-repo.bj.bcebos.com/thirdparty/juicefs-1.3.1-linux-amd64.tar.gz"
\
+ "$(juicefs_cli_archive_mirror_url 1.3.1)"
+
+REPOSITORY_URL="https://mirror.example.com/thirdparty/" \
+assert_lines
$'https://mirror.example.com/thirdparty/juicefs-1.3.1-linux-amd64.tar.gz\nhttps://github.com/juicedata/juicefs/releases/download/v1.3.1/juicefs-1.3.1-linux-amd64.tar.gz'
\
+ juicefs_cli_archive_download_urls 1.3.1
+
+echo "PASS"
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/BrokerDesc.java
b/fe/fe-core/src/main/java/org/apache/doris/analysis/BrokerDesc.java
index c77937cc8c3..2833cf977d3 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/BrokerDesc.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/BrokerDesc.java
@@ -141,12 +141,12 @@ public class BrokerDesc extends StorageDesc implements
Writable {
case S3:
return TFileType.FILE_S3;
case HDFS:
+ case JFS:
return TFileType.FILE_HDFS;
case STREAM:
return TFileType.FILE_STREAM;
case BROKER:
case OFS:
- case JFS:
default:
return TFileType.FILE_BROKER;
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/analysis/OutFileClause.java
b/fe/fe-core/src/main/java/org/apache/doris/analysis/OutFileClause.java
index cc7ecf52c4e..f9f641ae190 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/OutFileClause.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/OutFileClause.java
@@ -611,8 +611,9 @@ public class OutFileClause {
* - Centralize HDFS URI parsing logic
* - Add validation in FE to reject incomplete or malformed configs
*/
- if (null != brokerDesc.getStorageType() && brokerDesc.getStorageType()
- .equals(StorageBackend.StorageType.HDFS)) {
+ if (null != brokerDesc.getStorageType() && (brokerDesc.getStorageType()
+ .equals(StorageBackend.StorageType.HDFS)
+ ||
brokerDesc.getStorageType().equals(StorageBackend.StorageType.JFS))) {
String defaultFs =
HdfsPropertiesUtils.extractDefaultFsFromPath(filePath);
brokerDesc.getBackendConfigProperties().put(HdfsProperties.HDFS_DEFAULT_FS_NAME,
defaultFs);
}
@@ -770,4 +771,3 @@ public class OutFileClause {
}
}
-
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Resource.java
b/fe/fe-core/src/main/java/org/apache/doris/catalog/Resource.java
index 5db82e3875a..88fa40da58a 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Resource.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Resource.java
@@ -65,6 +65,9 @@ public abstract class Resource implements Writable,
GsonPostProcessable {
AI;
public static ResourceType fromString(String resourceType) {
+ if ("jfs".equalsIgnoreCase(resourceType) ||
"juicefs".equalsIgnoreCase(resourceType)) {
+ return HDFS;
+ }
for (ResourceType type : ResourceType.values()) {
if (type.name().equalsIgnoreCase(resourceType)) {
return type;
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/HdfsProperties.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/HdfsProperties.java
index 1ef4c5f921d..f2077b45ad7 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/HdfsProperties.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/HdfsProperties.java
@@ -88,7 +88,7 @@ public class HdfsProperties extends HdfsCompatibleProperties {
private static final String DFS_NAME_SERVICES_KEY = "dfs.nameservices";
- private static final Set<String> supportSchema = ImmutableSet.of("hdfs",
"viewfs");
+ private static final Set<String> supportSchema = ImmutableSet.of("hdfs",
"viewfs", "jfs");
/**
* The final HDFS configuration map that determines the effective settings.
@@ -143,7 +143,8 @@ public class HdfsProperties extends
HdfsCompatibleProperties {
}
userOverriddenHdfsConfig = new HashMap<>();
origProps.forEach((key, value) -> {
- if (key.startsWith("hadoop.") || key.startsWith("dfs.") ||
key.startsWith("fs.")) {
+ if (key.startsWith("hadoop.") || key.startsWith("dfs.") ||
key.startsWith("fs.")
+ || key.startsWith("juicefs.")) {
userOverriddenHdfsConfig.put(key, value);
}
});
diff --git a/fe/fe-core/src/main/java/org/apache/doris/fs/SchemaTypeMapper.java
b/fe/fe-core/src/main/java/org/apache/doris/fs/SchemaTypeMapper.java
index 0686f977d4d..57b0b21804d 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/fs/SchemaTypeMapper.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/fs/SchemaTypeMapper.java
@@ -58,7 +58,8 @@ public enum SchemaTypeMapper {
//todo Support for this type is planned but not yet implemented.
OFS("ofs", StorageProperties.Type.BROKER, FileSystemType.OFS,
TFileType.FILE_BROKER),
GFS("gfs", StorageProperties.Type.BROKER, FileSystemType.HDFS,
TFileType.FILE_BROKER),
- JFS("jfs", StorageProperties.Type.BROKER, FileSystemType.JFS,
TFileType.FILE_BROKER),
+ // JuiceFS is accessed through Hadoop-compatible APIs in Doris, so it
follows the HDFS path.
+ JFS("jfs", StorageProperties.Type.HDFS, FileSystemType.HDFS,
TFileType.FILE_HDFS),
VIEWFS("viewfs", StorageProperties.Type.HDFS, FileSystemType.HDFS,
TFileType.FILE_HDFS),
FILE("file", StorageProperties.Type.LOCAL, FileSystemType.FILE,
TFileType.FILE_LOCAL),
OSS_HDFS("oss", StorageProperties.Type.OSS_HDFS, FileSystemType.HDFS,
TFileType.FILE_HDFS),
@@ -158,4 +159,3 @@ public enum SchemaTypeMapper {
return SCHEMA_TO_FILE_TYPE_MAP.get(schema.toLowerCase());
}
}
-
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/common/util/LocationPathTest.java
b/fe/fe-core/src/test/java/org/apache/doris/common/util/LocationPathTest.java
index 8a73619824c..c2efae6532e 100644
---
a/fe/fe-core/src/test/java/org/apache/doris/common/util/LocationPathTest.java
+++
b/fe/fe-core/src/test/java/org/apache/doris/common/util/LocationPathTest.java
@@ -103,16 +103,16 @@ public class LocationPathTest {
}
@Test
- public void testJFSLocationConvert() {
+ public void testJfsLocationConvertAsHdfsCompatible() {
LocationPath locationPath = LocationPath.of("jfs://test.com");
// FE
Assertions.assertTrue(locationPath.getNormalizedLocation().startsWith("jfs://"));
// BE
String loc = locationPath.toStorageLocation().toString();
Assertions.assertTrue(loc.startsWith("jfs://"));
- Assertions.assertEquals(FileSystemType.JFS,
locationPath.getFileSystemType());
+ Assertions.assertEquals(FileSystemType.HDFS,
locationPath.getFileSystemType());
Assertions.assertEquals("jfs://test.com",
locationPath.getFsIdentifier());
- Assertions.assertEquals(TFileType.FILE_BROKER,
locationPath.getTFileTypeForBE());
+ Assertions.assertEquals(TFileType.FILE_HDFS,
locationPath.getTFileTypeForBE());
}
@Disabled("not support in master")
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/datasource/property/storage/HdfsPropertiesTest.java
b/fe/fe-core/src/test/java/org/apache/doris/datasource/property/storage/HdfsPropertiesTest.java
index b8ba275e9cf..3a6fa609453 100644
---
a/fe/fe-core/src/test/java/org/apache/doris/datasource/property/storage/HdfsPropertiesTest.java
+++
b/fe/fe-core/src/test/java/org/apache/doris/datasource/property/storage/HdfsPropertiesTest.java
@@ -41,6 +41,8 @@ public class HdfsPropertiesTest {
Map<String, String> simpleHdfsProperties = new HashMap<>();
simpleHdfsProperties.put("uri", "hdfs://test/1.orc");
Assertions.assertEquals(HdfsProperties.class,
StorageProperties.createPrimary(simpleHdfsProperties).getClass());
+ simpleHdfsProperties.put("uri", "jfs://test/1.orc");
+ Assertions.assertEquals(HdfsProperties.class,
StorageProperties.createPrimary(simpleHdfsProperties).getClass());
Map<String, String> origProps = createBaseHdfsProperties();
List<StorageProperties> storageProperties =
StorageProperties.createAll(origProps);
HdfsProperties hdfsProperties = (HdfsProperties)
storageProperties.get(0);
@@ -189,4 +191,22 @@ public class HdfsPropertiesTest {
Assertions.assertEquals("hdfs://localhost:9000/test",
hdfsProperties.validateAndNormalizeUri("hdfs://localhost:9000/test"));
}
+
+ @Test
+ public void testJfsBackendProperties() throws UserException {
+ Map<String, String> origProps = createBaseHdfsProperties();
+ origProps.put("fs.defaultFS", "jfs://cluster");
+ origProps.put("uri", "jfs://cluster/user/test/file.parquet");
+ origProps.put("fs.jfs.impl", "io.juicefs.JuiceFileSystem");
+ origProps.put("juicefs.cluster.meta", "redis://127.0.0.1:6379/1");
+
+ HdfsProperties hdfsProperties = (HdfsProperties)
StorageProperties.createAll(origProps).get(0);
+ Map<String, String> beProperties =
hdfsProperties.getBackendConfigProperties();
+ Assertions.assertEquals("jfs://cluster",
beProperties.get("fs.defaultFS"));
+ Assertions.assertEquals("io.juicefs.JuiceFileSystem",
beProperties.get("fs.jfs.impl"));
+ Assertions.assertEquals("redis://127.0.0.1:6379/1",
beProperties.get("juicefs.cluster.meta"));
+ Assertions.assertEquals("jfs://cluster/user/test/file.parquet",
hdfsProperties.validateAndGetUri(origProps));
+ Assertions.assertEquals("jfs://cluster/user/test/file.parquet",
+
hdfsProperties.validateAndNormalizeUri("jfs://cluster/user/test/file.parquet"));
+ }
}
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/datasource/property/storage/HdfsPropertiesUtilsTest.java
b/fe/fe-core/src/test/java/org/apache/doris/datasource/property/storage/HdfsPropertiesUtilsTest.java
index eed7360206b..df0a614eaf3 100644
---
a/fe/fe-core/src/test/java/org/apache/doris/datasource/property/storage/HdfsPropertiesUtilsTest.java
+++
b/fe/fe-core/src/test/java/org/apache/doris/datasource/property/storage/HdfsPropertiesUtilsTest.java
@@ -30,7 +30,7 @@ import java.util.Set;
public class HdfsPropertiesUtilsTest {
- private static final Set<String> supportSchema = ImmutableSet.of("hdfs",
"viewfs");
+ private static final Set<String> supportSchema = ImmutableSet.of("hdfs",
"viewfs", "jfs");
@Test
public void testCheckLoadPropsAndReturnUri_success() throws Exception {
@@ -107,6 +107,15 @@ public class HdfsPropertiesUtilsTest {
Assertions.assertEquals("viewfs://cluster", result);
}
+ @Test
+ public void testConstructDefaultFsFromUri_jfs() {
+ Map<String, String> props = new HashMap<>();
+ props.put("uri", "jfs://cluster/path");
+
+ String result = HdfsPropertiesUtils.extractDefaultFsFromUri(props,
supportSchema);
+ Assertions.assertEquals("jfs://cluster", result);
+ }
+
@Test
public void testConstructDefaultFsFromUri_invalidSchema() {
Map<String, String> props = new HashMap<>();
@@ -146,6 +155,13 @@ public class HdfsPropertiesUtilsTest {
Assertions.assertEquals("HDFS://localhost:9000/test", result);
}
+ @Test
+ public void testConvertUrlToFilePath_jfs() throws Exception {
+ String uri = "jfs://cluster/user/test";
+ String result = HdfsPropertiesUtils.convertUrlToFilePath(uri, "",
supportSchema);
+ Assertions.assertEquals("jfs://cluster/user/test", result);
+ }
+
@Test
public void testValidHaConfig() {
Map<String, String> config = new HashMap<>();
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/commands/CreateResourceCommandTest.java
b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/commands/CreateResourceCommandTest.java
index 954d9e90745..b385830afee 100644
---
a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/commands/CreateResourceCommandTest.java
+++
b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/commands/CreateResourceCommandTest.java
@@ -68,6 +68,11 @@ public class CreateResourceCommandTest extends
TestWithFeService {
info = new CreateResourceInfo(false, false, "test", properties);
CreateResourceCommand createResourceCommand3 = new
CreateResourceCommand(info);
Assertions.assertThrows(AnalysisException.class, () ->
createResourceCommand3.getInfo().validate());
+
+ properties = ImmutableMap.of("type", "jfs", "fs.defaultFS",
"jfs://cluster");
+ info = new CreateResourceInfo(true, false, "test_jfs", properties);
+ CreateResourceCommand createResourceCommand4 = new
CreateResourceCommand(info);
+ Assertions.assertDoesNotThrow(() ->
createResourceCommand4.getInfo().validate());
}
@Test
diff --git
a/regression-test/suites/external_table_p0/refactor_storage_param/test_jfs_hms_catalog_read.groovy
b/regression-test/suites/external_table_p0/refactor_storage_param/test_jfs_hms_catalog_read.groovy
new file mode 100644
index 00000000000..c87e0e82697
--- /dev/null
+++
b/regression-test/suites/external_table_p0/refactor_storage_param/test_jfs_hms_catalog_read.groovy
@@ -0,0 +1,141 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_jfs_hms_catalog_read", "p0,external") {
+ String enableJfs = context.config.otherConfigs.get("enableJfsTest")
+ if (enableJfs == null || !enableJfs.equalsIgnoreCase("true")) {
+ logger.info("disable JFS test.")
+ return
+ }
+
+ String enableHive = context.config.otherConfigs.get("enableHiveTest")
+ if (enableHive == null || !enableHive.equalsIgnoreCase("true")) {
+ logger.info("disable Hive test.")
+ return
+ }
+
+ String jfsFs = context.config.otherConfigs.get("jfsFs")
+ if (jfsFs == null || jfsFs.trim().isEmpty()) {
+ logger.info("skip JFS test because jfsFs is empty.")
+ return
+ }
+
+ String jfsImpl = context.config.otherConfigs.get("jfsImpl")
+ if (jfsImpl == null || jfsImpl.trim().isEmpty()) {
+ jfsImpl = "io.juicefs.JuiceFileSystem"
+ }
+ String jfsMeta = context.config.otherConfigs.get("jfsMeta")
+ if (jfsMeta == null || jfsMeta.trim().isEmpty()) {
+ throw new IllegalStateException("jfsMeta must be configured for JFS
data IO regression")
+ }
+ String jfsCluster = jfsFs.replaceFirst("^jfs://", "")
+ int slashPos = jfsCluster.indexOf("/")
+ if (slashPos > 0) {
+ jfsCluster = jfsCluster.substring(0, slashPos)
+ }
+ String jfsMetaProperty = ",\n 'juicefs.${jfsCluster}.meta'
= '${jfsMeta}'"
+
+ String hdfsUser = context.config.otherConfigs.get("jfsHadoopUser")
+ if (hdfsUser == null || hdfsUser.trim().isEmpty()) {
+ hdfsUser = context.config.otherConfigs.get("hdfsUser")
+ }
+ if (hdfsUser == null || hdfsUser.trim().isEmpty()) {
+ hdfsUser = "root"
+ }
+
+ String hmsUris = context.config.otherConfigs.get("jfsHiveMetastoreUris")
+ if (hmsUris == null || hmsUris.trim().isEmpty()) {
+ String externalEnvIp = context.config.otherConfigs.get("externalEnvIp")
+ String hmsPort = context.config.otherConfigs.get("hive3HmsPort")
+ if (hmsPort == null || hmsPort.trim().isEmpty()) {
+ hmsPort = context.config.otherConfigs.get("hive2HmsPort")
+ }
+ if (externalEnvIp == null || externalEnvIp.trim().isEmpty()
+ || hmsPort == null || hmsPort.trim().isEmpty()) {
+ logger.info("skip JFS test because jfsHiveMetastoreUris is empty
and fallback externalEnvIp/hmsPort is invalid.")
+ return
+ }
+ hmsUris = "thrift://${externalEnvIp}:${hmsPort}"
+ }
+ String catalogName = "test_jfs_hms_catalog_read"
+ String dbName = "test_jfs_hms_catalog_read_db"
+ String tableName = "test_jfs_hms_catalog_read_tbl"
+ String jfsDbBasePath = context.config.otherConfigs.get("jfsDbBasePath")
+ if (jfsDbBasePath == null || jfsDbBasePath.trim().isEmpty()) {
+ jfsDbBasePath = "${jfsFs}/doris_jfs/${hdfsUser}"
+ }
+ jfsDbBasePath = jfsDbBasePath.replaceAll('/+$', '')
+ String jfsStagingDir = context.config.otherConfigs.get("jfsStagingDir")
+ if (jfsStagingDir == null || jfsStagingDir.trim().isEmpty()) {
+ jfsStagingDir = "${jfsDbBasePath}/.doris_staging"
+ }
+ jfsStagingDir = jfsStagingDir.replaceAll('/+$', '')
+ String dbLocation = "${jfsDbBasePath}/${dbName}"
+
+ sql """drop catalog if exists ${catalogName}"""
+
+ try {
+ sql """
+ CREATE CATALOG ${catalogName} PROPERTIES (
+ 'type' = 'hms',
+ 'hive.metastore.uris' = '${hmsUris}',
+ 'fs.defaultFS' = '${jfsFs}',
+ 'fs.jfs.impl' = '${jfsImpl}',
+ 'hadoop.username' = '${hdfsUser}',
+ 'hive.staging_dir' = '${jfsStagingDir}'
+ ${jfsMetaProperty}
+ );
+ """
+
+ sql """switch ${catalogName}"""
+ def dbs = sql """show databases"""
+ assertTrue(dbs.size() > 0)
+
+ def hasDb = sql """show databases like '${dbName}'"""
+ if (hasDb.size() > 0) {
+ sql """drop table if exists `${dbName}`.`${tableName}`"""
+ sql """drop database if exists `${dbName}`"""
+ }
+ sql """
+ create database `${dbName}`
+ properties('location'='${dbLocation}')
+ """
+ sql """use `${dbName}`"""
+ sql """
+ CREATE TABLE `${tableName}` (
+ `id` INT,
+ `name` STRING
+ ) ENGINE=hive
+ PROPERTIES (
+ 'file_format'='parquet'
+ )
+ """
+ sql """insert into `${tableName}` values (1, 'jfs_1'), (2, 'jfs_2')"""
+
+ def cnt = sql """select count(*) from `${tableName}`"""
+ assertEquals("2", cnt[0][0].toString())
+
+ def rows = sql """select * from `${tableName}` order by id"""
+ assertTrue(rows.size() == 2)
+ assertEquals("1", rows[0][0].toString())
+ assertEquals("jfs_1", rows[0][1].toString())
+ assertEquals("2", rows[1][0].toString())
+ assertEquals("jfs_2", rows[1][1].toString())
+ } finally {
+ sql """switch internal"""
+ }
+}
diff --git a/thirdparty/build-thirdparty.sh b/thirdparty/build-thirdparty.sh
index ef0b5897402..abd7c62eaa7 100755
--- a/thirdparty/build-thirdparty.sh
+++ b/thirdparty/build-thirdparty.sh
@@ -1953,6 +1953,15 @@ build_jindofs() {
cp -r ${TP_SOURCE_DIR}/${JINDOFS_SOURCE}/*
"${TP_INSTALL_DIR}/jindofs_libs/"
}
+# juicefs
+build_juicefs() {
+ check_if_archive_exist "${JUICEFS_NAME}"
+
+ rm -rf "${TP_INSTALL_DIR}/juicefs_libs/"
+ mkdir -p "${TP_INSTALL_DIR}/juicefs_libs/"
+ cp -r "${TP_SOURCE_DIR}/${JUICEFS_NAME}" "${TP_INSTALL_DIR}/juicefs_libs/"
+}
+
# pugixml
build_pugixml() {
check_if_source_exist "${PUGIXML_SOURCE}"
@@ -1974,6 +1983,7 @@ build_pugixml() {
if [[ "${#packages[@]}" -eq 0 ]]; then
packages=(
jindofs
+ juicefs
odbc
openssl
libevent
@@ -2139,6 +2149,7 @@ cleanup_package_source() {
dragonbox) src_var="DRAGONBOX_SOURCE" ;;
icu) src_var="ICU_SOURCE" ;;
jindofs) src_var="JINDOFS_SOURCE" ;;
+ juicefs) src_var="JUICEFS_SOURCE" ;;
pugixml) src_var="PUGIXML_SOURCE" ;;
aws_sdk) src_var="AWS_SDK_SOURCE" ;;
lzma) src_var="LZMA_SOURCE" ;;
diff --git a/thirdparty/download-thirdparty.sh
b/thirdparty/download-thirdparty.sh
index 68291cdf21c..e639f511575 100755
--- a/thirdparty/download-thirdparty.sh
+++ b/thirdparty/download-thirdparty.sh
@@ -103,6 +103,27 @@ md5sum_func() {
return 0
}
+juicefs_default_repository_url() {
+ if [[ -n "${s3BucketName:-}" && -n "${s3Endpoint:-}" ]]; then
+ echo
"https://${s3BucketName}.${s3Endpoint}/regression/datalake/thirdparty/juicefs"
+ return 0
+ fi
+ echo "https://doris-thirdparty-repo.bj.bcebos.com/thirdparty"
+}
+
+juicefs_repository_url() {
+ local
repository_url="${JUICEFS_THIRDPARTY_REPOSITORY_URL:-${REPOSITORY_URL:-}}"
+ if [[ -z "${repository_url}" ]]; then
+ repository_url="$(juicefs_default_repository_url)"
+ fi
+ echo "${repository_url%/}"
+}
+
+juicefs_repository_file_url() {
+ local filename="$1"
+ echo "$(juicefs_repository_url)/${filename}"
+}
+
# return 0 if download succeed.
# return 1 if not.
download_func() {
@@ -161,6 +182,15 @@ echo "===== Downloading thirdparty archives..."
for TP_ARCH in "${TP_ARCHIVES[@]}"; do
NAME="${TP_ARCH}_NAME"
MD5SUM="${TP_ARCH}_MD5SUM"
+ if [[ "${TP_ARCH}" == "JUICEFS" ]]; then
+ MIRROR_URL="$(juicefs_repository_file_url "${!NAME}")"
+ if ! download_func "${!NAME}" "${MIRROR_URL}" "${TP_SOURCE_DIR}"
"${!MD5SUM}" \
+ && ! download_func "${!NAME}" "${JUICEFS_DOWNLOAD}"
"${TP_SOURCE_DIR}" "${!MD5SUM}"; then
+ echo "Failed to download ${!NAME}"
+ exit 1
+ fi
+ continue
+ fi
if [[ -z "${REPOSITORY_URL}" ]]; then
URL="${TP_ARCH}_DOWNLOAD"
if ! download_func "${!NAME}" "${!URL}" "${TP_SOURCE_DIR}"
"${!MD5SUM}"; then
diff --git a/thirdparty/test/download-thirdparty-fallback-test.sh
b/thirdparty/test/download-thirdparty-fallback-test.sh
new file mode 100644
index 00000000000..32a7cac5aac
--- /dev/null
+++ b/thirdparty/test/download-thirdparty-fallback-test.sh
@@ -0,0 +1,99 @@
+#!/usr/bin/env bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -eo pipefail
+
+ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." &>/dev/null && pwd)"
+
+fail() {
+ echo "FAIL: $*" >&2
+ exit 1
+}
+
+assert_eq() {
+ local expected="$1"
+ local actual="$2"
+ [[ "${actual}" == "${expected}" ]] || fail "expected '${expected}', got
'${actual}'"
+}
+
+tmpdir="$(mktemp -d)"
+trap 'rm -rf "${tmpdir}"' EXIT
+
+stub_bin="${tmpdir}/bin"
+mkdir -p "${stub_bin}"
+download_log="${tmpdir}/wget.log"
+payload="juicefs jar payload"
+payload_md5="$(printf '%s' "${payload}" | md5sum | awk '{print $1}')"
+
+cat > "${tmpdir}/vars.sh" <<EOF
+#!/bin/bash
+export TP_SOURCE_DIR="${tmpdir}/src"
+export TP_INSTALL_DIR="${tmpdir}/installed"
+export TP_PATCH_DIR="${tmpdir}/patches"
+export TP_INCLUDE_DIR="\${TP_INSTALL_DIR}/include"
+export TP_LIB_DIR="\${TP_INSTALL_DIR}/lib"
+export TP_JAR_DIR="\${TP_INSTALL_DIR}/lib/jar"
+JUICEFS_DOWNLOAD="https://repo1.maven.org/maven2/io/juicefs/juicefs-hadoop/1.3.1/juicefs-hadoop-1.3.1.jar"
+JUICEFS_NAME="juicefs-hadoop-1.3.1.jar"
+JUICEFS_SOURCE=
+JUICEFS_MD5SUM="${payload_md5}"
+export TP_ARCHIVES=('JUICEFS')
+EOF
+
+cat > "${stub_bin}/wget" <<EOF
+#!/usr/bin/env bash
+set -eo pipefail
+url=""
+output=""
+expect_output=0
+for arg in "\$@"; do
+ if [[ "\${expect_output}" -eq 1 ]]; then
+ output="\${arg}"
+ expect_output=0
+ continue
+ fi
+ if [[ "\${arg}" == "-O" ]]; then
+ expect_output=1
+ continue
+ fi
+ if [[ "\${arg}" != -* && -z "\${url}" ]]; then
+ url="\${arg}"
+ fi
+done
+echo "\${url}" >> "${download_log}"
+if [[ "\${url}" ==
"https://repo1.maven.org/maven2/io/juicefs/juicefs-hadoop/1.3.1/juicefs-hadoop-1.3.1.jar"
]]; then
+ printf '%s' "${payload}" > "\${output}"
+ exit 0
+fi
+exit 1
+EOF
+chmod +x "${stub_bin}/wget"
+
+if ! PATH="${stub_bin}:${PATH}" s3BucketName="test-bucket"
s3Endpoint="oss.example.com" \
+ TP_DIR="${tmpdir}" DORIS_HOME="${ROOT}/.." \
+ bash "${ROOT}/download-thirdparty.sh"; then
+ fail "expected download-thirdparty.sh to fall back from S3 mirror to Maven
for JUICEFS"
+fi
+
+[[ -f "${tmpdir}/src/juicefs-hadoop-1.3.1.jar" ]] || fail "expected downloaded
archive"
+assert_eq "${payload}" "$(cat "${tmpdir}/src/juicefs-hadoop-1.3.1.jar")"
+
+expected_log=$'https://test-bucket.oss.example.com/regression/datalake/thirdparty/juicefs/juicefs-hadoop-1.3.1.jar\nhttps://test-bucket.oss.example.com/regression/datalake/thirdparty/juicefs/juicefs-hadoop-1.3.1.jar\nhttps://repo1.maven.org/maven2/io/juicefs/juicefs-hadoop/1.3.1/juicefs-hadoop-1.3.1.jar'
+assert_eq "${expected_log}" "$(cat "${download_log}")"
+
+echo "PASS"
diff --git a/thirdparty/vars.sh b/thirdparty/vars.sh
index 6bb4551299b..ac36a438055 100644
--- a/thirdparty/vars.sh
+++ b/thirdparty/vars.sh
@@ -546,6 +546,12 @@ JINDOFS_NAME=jindofs-6.8.2-libs-0.1.tar.gz
JINDOFS_SOURCE=jindofs-6.8.2-libs-0.1
JINDOFS_MD5SUM="0e5b0f71e636b8ed3f09e0bf16208fd1"
+# juicefs
+JUICEFS_DOWNLOAD="https://repo1.maven.org/maven2/io/juicefs/juicefs-hadoop/1.3.1/juicefs-hadoop-1.3.1.jar"
+JUICEFS_NAME=juicefs-hadoop-1.3.1.jar
+JUICEFS_SOURCE=
+JUICEFS_MD5SUM="f374dfbfbdc4b83417cfea78a6728c54"
+
# pugixml
PUGIXML_DOWNLOAD="https://github.com/zeux/pugixml/releases/download/v1.15/pugixml-1.15.tar.gz"
PUGIXML_NAME=pugixml-1.15.tar.gz
@@ -633,6 +639,7 @@ export TP_ARCHIVES=(
'DRAGONBOX'
'ICU'
'JINDOFS'
+ 'JUICEFS'
'PUGIXML'
)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]