This is an automated email from the ASF dual-hosted git repository.
kirs pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.1 by this push:
new 2235c1abd3a [Feat](OSS)Decouple COS and OBS dependencies with optional
inclusion mechanism (#39831)
2235c1abd3a is described below
commit 2235c1abd3a67ea3ae92fdd0986e101b4f7be08d
Author: Calvin Kirs <[email protected]>
AuthorDate: Mon Sep 9 16:50:12 2024 +0800
[Feat](OSS)Decouple COS and OBS dependencies with optional inclusion
mechanism (#39831)
…
improve the flexibility of the project by decoupling direct dependencies
on the hadoop-cos and hadoop-huaweicloud libraries. These changes allow
users to control whether COS and OBS dependencies are included in the
final build, enabling a more customizable setup.
## Proposed changes
Issue Number: close #xxx
<!--Describe your changes.-->
---
fe/fe-core/pom.xml | 3 ++
.../datasource/property/PropertyConverter.java | 23 +++++++------
.../property/constants/CosProperties.java | 38 +++++++++++++++++++++-
.../property/constants/ObsProperties.java | 37 +++++++++++++++++++++
fe/pom.xml | 3 ++
5 files changed, 91 insertions(+), 13 deletions(-)
diff --git a/fe/fe-core/pom.xml b/fe/fe-core/pom.xml
index 7f3915ce120..1c5ee1990e1 100644
--- a/fe/fe-core/pom.xml
+++ b/fe/fe-core/pom.xml
@@ -410,6 +410,7 @@ under the License.
<groupId>com.huaweicloud</groupId>
<artifactId>hadoop-huaweicloud</artifactId>
<version>${huaweiobs.version}</version>
+ <scope>${obs.dependency.scope}</scope>
<exclusions>
<exclusion>
<artifactId>jackson-databind</artifactId>
@@ -425,6 +426,7 @@ under the License.
<groupId>com.qcloud.cos</groupId>
<artifactId>hadoop-cos</artifactId>
<version>${tencentcos.version}</version>
+ <scope>${cos.dependency.scope}</scope>
<exclusions>
<exclusion>
<groupId>org.json</groupId>
@@ -666,6 +668,7 @@ under the License.
<artifactId>gcs-connector</artifactId>
<version>hadoop2-2.2.8</version>
<classifier>shaded</classifier>
+ <scope>${gcs.dependency.scope}</scope>
<exclusions>
<exclusion>
<groupId>*</groupId>
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/PropertyConverter.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/PropertyConverter.java
index a0ea2d3b69e..4c3591df6fe 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/PropertyConverter.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/PropertyConverter.java
@@ -38,11 +38,7 @@ import com.aliyun.datalake.metastore.common.DataLakeConfig;
import com.amazonaws.glue.catalog.util.AWSGlueConfig;
import com.google.common.base.Strings;
import com.google.common.collect.Maps;
-import org.apache.hadoop.fs.CosFileSystem;
-import org.apache.hadoop.fs.CosNConfigKeys;
import org.apache.hadoop.fs.aliyun.oss.AliyunOSSFileSystem;
-import org.apache.hadoop.fs.obs.OBSConstants;
-import org.apache.hadoop.fs.obs.OBSFileSystem;
import org.apache.hadoop.fs.s3a.Constants;
import org.apache.hadoop.fs.s3a.S3AFileSystem;
import org.apache.hadoop.fs.s3a.TemporaryAWSCredentialsProvider;
@@ -164,12 +160,12 @@ public class PropertyConverter {
private static Map<String, String> convertToOBSProperties(Map<String,
String> props,
CloudCredential
credential) {
Map<String, String> obsProperties = Maps.newHashMap();
- obsProperties.put(OBSConstants.ENDPOINT,
props.get(ObsProperties.ENDPOINT));
+ obsProperties.put(ObsProperties.HadoopFsObsConstants.ENDPOINT,
props.get(ObsProperties.ENDPOINT));
obsProperties.put(ObsProperties.FS.IMPL_DISABLE_CACHE, "true");
obsProperties.put("fs.obs.impl", getHadoopFSImplByScheme("obs"));
if (credential.isWhole()) {
- obsProperties.put(OBSConstants.ACCESS_KEY,
credential.getAccessKey());
- obsProperties.put(OBSConstants.SECRET_KEY,
credential.getSecretKey());
+ obsProperties.put(ObsProperties.HadoopFsObsConstants.ACCESS_KEY,
credential.getAccessKey());
+ obsProperties.put(ObsProperties.HadoopFsObsConstants.SECRET_KEY,
credential.getSecretKey());
}
if (credential.isTemporary()) {
obsProperties.put(ObsProperties.FS.SESSION_TOKEN,
credential.getSessionToken());
@@ -184,11 +180,11 @@ public class PropertyConverter {
public static String getHadoopFSImplByScheme(String fsScheme) {
if (fsScheme.equalsIgnoreCase("obs")) {
- return OBSFileSystem.class.getName();
+ return ObsProperties.HadoopFsObsConstants.HADOOP_FS_OBS_CLASS_NAME;
} else if (fsScheme.equalsIgnoreCase("oss")) {
return AliyunOSSFileSystem.class.getName();
} else if (fsScheme.equalsIgnoreCase("cosn") ||
fsScheme.equalsIgnoreCase("lakefs")) {
- return CosFileSystem.class.getName();
+ return CosProperties.HadoopFsCosConstants.HADOOP_FS_COS_CLASS_NAME;
} else {
return S3AFileSystem.class.getName();
}
@@ -350,13 +346,16 @@ public class PropertyConverter {
private static Map<String, String> convertToCOSProperties(Map<String,
String> props, CloudCredential credential) {
Map<String, String> cosProperties = Maps.newHashMap();
- cosProperties.put(CosNConfigKeys.COSN_ENDPOINT_SUFFIX_KEY,
props.get(CosProperties.ENDPOINT));
+
cosProperties.put(CosProperties.HadoopFsCosConstants.COSN_ENDPOINT_SUFFIX_KEY,
+ props.get(CosProperties.ENDPOINT));
cosProperties.put("fs.cosn.impl.disable.cache", "true");
cosProperties.put("fs.cosn.impl", getHadoopFSImplByScheme("cosn"));
cosProperties.put("fs.lakefs.impl", getHadoopFSImplByScheme("lakefs"));
if (credential.isWhole()) {
- cosProperties.put(CosNConfigKeys.COSN_USERINFO_SECRET_ID_KEY,
credential.getAccessKey());
- cosProperties.put(CosNConfigKeys.COSN_USERINFO_SECRET_KEY_KEY,
credential.getSecretKey());
+
cosProperties.put(CosProperties.HadoopFsCosConstants.COSN_USERINFO_SECRET_ID_KEY,
+ credential.getAccessKey());
+
cosProperties.put(CosProperties.HadoopFsCosConstants.COSN_USERINFO_SECRET_KEY_KEY,
+ credential.getSecretKey());
}
// session token is unsupported
for (Map.Entry<String, String> entry : props.entrySet()) {
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/constants/CosProperties.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/constants/CosProperties.java
index bdf99a76fb7..a18937b6b1b 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/constants/CosProperties.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/constants/CosProperties.java
@@ -34,8 +34,44 @@ public class CosProperties extends BaseProperties {
public static final String SESSION_TOKEN = "cos.session_token";
public static final List<String> REQUIRED_FIELDS = Arrays.asList(ENDPOINT,
ACCESS_KEY, SECRET_KEY);
-
public static CloudCredential getCredential(Map<String, String> props) {
return getCloudCredential(props, ACCESS_KEY, SECRET_KEY,
SESSION_TOKEN);
}
+
+ /**
+ * This class contains constants related to the COS (Tencent Cloud Object
Storage) properties.
+ * <p>
+ * The constants in the `HadoopFsCosConstants` inner class are copied from
+ * `org.apache.hadoop.fs.CosNConfigKeys`. This approach is intentionally
taken to
+ * avoid a compile-time dependency on the `hadoop-cos` library. By doing
so, we
+ * ensure that this project remains decoupled from `hadoop-cos`, allowing
it to be
+ * compiled and built independently.
+ * <p>
+ * We can control whether to include COS-related dependencies by
configuring
+ * a build parameter. By default, the COS-related dependencies are not
included in
+ * the packaging process. If the package does not contain these
dependencies but
+ * the functionality related to Hadoop COS is required, users will need to
manually
+ * copy the relevant dependencies into the `fe/lib` directory.
+ * <p>
+ * However, since this is not an uberjar and the required dependencies are
not bundled
+ * together, manually copying dependencies is not recommended due to
potential
+ * issues such as version conflicts or missing transitive dependencies.
+ * <p>
+ * Users are encouraged to configure the build process to include the
necessary
+ * dependencies when Hadoop COS support is required, ensuring a smoother
+ * and more reliable deployment.
+ * <p>
+ * Additionally, by copying these constants instead of directly depending
on
+ * `hadoop-cos`, there is an additional maintenance overhead. Any changes
in
+ * `CosNConfigKeys` in future versions of `hadoop-cos` will not be
automatically
+ * reflected here. It is important to manually track and update these
constants
+ * as needed to ensure compatibility.
+ */
+ public static class HadoopFsCosConstants {
+ public static final String HADOOP_FS_COS_CLASS_NAME =
"org.apache.hadoop.fs.CosFileSystem";
+
+ public static final String COSN_ENDPOINT_SUFFIX_KEY =
"fs.cosn.bucket.endpoint_suffix";
+ public static final String COSN_USERINFO_SECRET_ID_KEY =
"fs.cosn.userinfo.secretId";
+ public static final String COSN_USERINFO_SECRET_KEY_KEY =
"fs.cosn.userinfo.secretKey";
+ }
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/constants/ObsProperties.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/constants/ObsProperties.java
index 19ff097fc32..ce2d29f1c76 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/constants/ObsProperties.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/constants/ObsProperties.java
@@ -43,4 +43,41 @@ public class ObsProperties extends BaseProperties {
public static CloudCredential getCredential(Map<String, String> props) {
return getCloudCredential(props, ACCESS_KEY, SECRET_KEY,
SESSION_TOKEN);
}
+
+ /**
+ * This class contains constants related to the OBS (Hua Wei Object
Storage Service) properties.
+ * <p>
+ * The constants in the `HadoopFsObsConstants` inner class are copied from
+ * `org.apache.hadoop.fs.obs.OBSConstants`. This approach is deliberately
taken to
+ * avoid a compile-time dependency on the `hadoop-huaweicloud` library. By
doing so, we
+ * ensure that this project remains decoupled from `hadoop-obs`, allowing
it to be
+ * compiled and built independently.
+ * <p>
+ * Similar to the Obs properties, we can control whether to include
OBS-related
+ * dependencies by configuring a build parameter. By default, the
OBS-related
+ * dependencies are not included in the packaging process. If the package
does not
+ * contain these dependencies but the functionality related to Hadoop OBS
is required,
+ * users will need to manually copy the relevant dependencies into the
`fe/lib` directory.
+ * <p>
+ * However, manually copying dependencies is not recommended since this is
not an
+ * uberjar, and there could be potential issues such as version conflicts
or missing
+ * transitive dependencies.
+ * <p>
+ * Users are encouraged to configure the build process to include the
necessary
+ * dependencies when Hadoop OBS support is required, ensuring a smoother
+ * and more reliable deployment.
+ * <p>
+ * Additionally, by copying these constants instead of directly depending
on
+ * `hadoop-huaweicloud`, there is an additional maintenance overhead. Any
changes in
+ * `OBSConstants` in future versions of `hadoop-obs` will not be
automatically
+ * reflected here. It is important to manually track and update these
constants
+ * as needed to ensure compatibility.
+ */
+ public static class HadoopFsObsConstants {
+ public static final String HADOOP_FS_OBS_CLASS_NAME =
"org.apache.hadoop.fs.obs.OBSFileSystem";
+
+ public static final String ENDPOINT = "fs.obs.endpoint";
+ public static final String ACCESS_KEY = "fs.obs.access.key";
+ public static final String SECRET_KEY = "fs.obs.secret.key";
+ }
}
diff --git a/fe/pom.xml b/fe/pom.xml
index c8447d0878f..768f40c3337 100644
--- a/fe/pom.xml
+++ b/fe/pom.xml
@@ -227,6 +227,9 @@ under the License.
<!--suppress UnresolvedMavenProperty -->
<doris.home>${fe.dir}/../</doris.home>
<revision>1.2-SNAPSHOT</revision>
+ <obs.dependency.scope>compile</obs.dependency.scope>
+ <cos.dependency.scope>compile</cos.dependency.scope>
+ <gcs.dependency.scope>compile</gcs.dependency.scope>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<doris.hive.catalog.shade.version>2.1.1</doris.hive.catalog.shade.version>
<maven.compiler.source>1.8</maven.compiler.source>
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]