This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new c5dbd53e6f [fix](multi-catalog)support oss-hdfs service (#21504)
c5dbd53e6f is described below
commit c5dbd53e6f3e4db71206e18e4f1b97d4d9d037e4
Author: slothever <[email protected]>
AuthorDate: Thu Jul 13 18:02:15 2023 +0800
[fix](multi-catalog)support oss-hdfs service (#21504)
1. support oss-hdfs if it is enabled when use dlf or hms catalog
2. add docs for aliyun dlf and mc.
---
docs/en/docs/lakehouse/multi-catalog/dlf.md | 99 +++++++++----------
docs/en/docs/lakehouse/multi-catalog/iceberg.md | 6 +-
.../en/docs/lakehouse/multi-catalog/max_compute.md | 60 ++++++++++++
docs/sidebars.json | 1 +
docs/zh-CN/docs/lakehouse/multi-catalog/dlf.md | 105 +++++++++------------
docs/zh-CN/docs/lakehouse/multi-catalog/iceberg.md | 6 +-
.../docs/lakehouse/multi-catalog/max_compute.md | 60 ++++++++++++
.../java/org/apache/doris/common/util/S3Util.java | 14 ++-
.../datasource/property/PropertyConverter.java | 51 +++++++++-
.../property/constants/OssProperties.java | 1 +
.../doris/planner/external/FileQueryScanNode.java | 4 +
11 files changed, 288 insertions(+), 119 deletions(-)
diff --git a/docs/en/docs/lakehouse/multi-catalog/dlf.md
b/docs/en/docs/lakehouse/multi-catalog/dlf.md
index 42c302e5c7..763fa9fdd8 100644
--- a/docs/en/docs/lakehouse/multi-catalog/dlf.md
+++ b/docs/en/docs/lakehouse/multi-catalog/dlf.md
@@ -35,28 +35,28 @@ Doris can access DLF the same way as it accesses Hive
Metastore.
## Connect to DLF
-### The First Way, Create a Hive Catalog.
+### Create a DLF Catalog.
```sql
-CREATE CATALOG hive_with_dlf PROPERTIES (
+CREATE CATALOG dlf PROPERTIES (
"type"="hms",
- "dlf.catalog.proxyMode" = "DLF_ONLY",
"hive.metastore.type" = "dlf",
- "dlf.catalog.endpoint" = "dlf.cn-beijing.aliyuncs.com",
- "dlf.catalog.region" = "cn-beijing",
- "dlf.catalog.uid" = "uid",
- "dlf.catalog.accessKeyId" = "ak",
- "dlf.catalog.accessKeySecret" = "sk"
+ "dlf.proxy.mode" = "DLF_ONLY",
+ "dlf.endpoint" = "datalake-vpc.cn-beijing.aliyuncs.com",
+ "dlf.region" = "cn-beijing",
+ "dlf.uid" = "uid",
+ "dlf.access_key" = "ak",
+ "dlf.secret_key" = "sk"
);
```
-`type` should always be `hms`. If you need to access Alibaba Cloud OSS on the
public network, can add `"dlf.catalog.accessPublic"="true"`.
+`type` should always be `hms`. If you need to access Alibaba Cloud OSS on the
public network, can add `"dlf.access.public"="true"`.
-* `dlf.catalog.endpoint`: DLF Endpoint. See [Regions and Endpoints of
DLF](https://www.alibabacloud.com/help/en/data-lake-formation/latest/regions-and-endpoints).
-* `dlf.catalog.region`: DLF Region. See [Regions and Endpoints of
DLF](https://www.alibabacloud.com/help/en/data-lake-formation/latest/regions-and-endpoints).
-* `dlf.catalog.uid`: Alibaba Cloud account. You can find the "Account ID" in
the upper right corner on the Alibaba Cloud console.
-* `dlf.catalog.accessKeyId`:AccessKey, which you can create and manage on the
[Alibaba Cloud console](https://ram.console.aliyun.com/manage/ak).
-* `dlf.catalog.accessKeySecret`:SecretKey, which you can create and manage on
the [Alibaba Cloud console](https://ram.console.aliyun.com/manage/ak).
+* `dlf.endpoint`: DLF Endpoint. See [Regions and Endpoints of
DLF](https://www.alibabacloud.com/help/en/data-lake-formation/latest/regions-and-endpoints).
+* `dlf.region`: DLF Region. See [Regions and Endpoints of
DLF](https://www.alibabacloud.com/help/en/data-lake-formation/latest/regions-and-endpoints).
+* `dlf.uid`: Alibaba Cloud account. You can find the "Account ID" in the upper
right corner on the Alibaba Cloud console.
+* `dlf.access_key`:AccessKey, which you can create and manage on the [Alibaba
Cloud console](https://ram.console.aliyun.com/manage/ak).
+* `dlf.secret_key`:SecretKey, which you can create and manage on the [Alibaba
Cloud console](https://ram.console.aliyun.com/manage/ak).
Other configuration items are fixed and require no modifications.
@@ -64,54 +64,43 @@ After the above steps, you can access metadata in DLF the
same way as you access
Doris supports accessing Hive/Iceberg/Hudi metadata in DLF.
-### The Second Way, Configure the Hive Conf
+### Use OSS-HDFS as the datasource
-1. Create the `hive-site.xml` file, and put it in the `fe/conf` directory.
+1. Enable OSS-HDFS. [Grant access to OSS or
OSS-HDFS](https://www.alibabacloud.com/help/en/e-mapreduce/latest/oss-hdfsnew)
+2. Download the SDK. [JindoData
SDK](https://github.com/aliyun/alibabacloud-jindodata/blob/master/docs/user/5.x/5.0.0-beta7/jindodata_download.md)
+3. Decompress the jindosdk.tar.gz, and then enter its lib directory and put
`jindo-core.jar, jindo-sdk.jar` to both `${DORIS_HOME}/fe/lib` and
`${DORIS_HOME}/be/lib/java_extensions`.
+4. Create DLF Catalog, set `oss.hdfs.enabled` as `true`:
-```
-<?xml version="1.0"?>
-<configuration>
- <!--Set to use dlf client-->
- <property>
- <name>hive.metastore.type</name>
- <value>dlf</value>
- </property>
- <property>
- <name>dlf.catalog.endpoint</name>
- <value>dlf-vpc.cn-beijing.aliyuncs.com</value>
- </property>
- <property>
- <name>dlf.catalog.region</name>
- <value>cn-beijing</value>
- </property>
- <property>
- <name>dlf.catalog.proxyMode</name>
- <value>DLF_ONLY</value>
- </property>
- <property>
- <name>dlf.catalog.uid</name>
- <value>20000000000000000</value>
- </property>
- <property>
- <name>dlf.catalog.accessKeyId</name>
- <value>XXXXXXXXXXXXXXX</value>
- </property>
- <property>
- <name>dlf.catalog.accessKeySecret</name>
- <value>XXXXXXXXXXXXXXXXX</value>
- </property>
-</configuration>
+```sql
+CREATE CATALOG dlf_oss_hdfs PROPERTIES (
+ "type"="hms",
+ "hive.metastore.type" = "dlf",
+ "dlf.proxy.mode" = "DLF_ONLY",
+ "dlf.endpoint" = "datalake-vpc.cn-beijing.aliyuncs.com",
+ "dlf.region" = "cn-beijing",
+ "dlf.uid" = "uid",
+ "dlf.access_key" = "ak",
+ "dlf.secret_key" = "sk",
+ "oss.hdfs.enabled" = "true"
+);
```
-2. Restart FE, Doris will read and parse `fe/conf/hive-site.xml`. And then
Create Catalog via the `CREATE CATALOG` statement.
+### DLF Iceberg Catalog
```sql
-CREATE CATALOG hive_with_dlf PROPERTIES (
- "type"="hms",
- "hive.metastore.uris" = "thrift://127.0.0.1:9083"
-)
+CREATE CATALOG dlf_iceberg PROPERTIES (
+ "type"="iceberg",
+ "iceberg.catalog.type" = "dlf",
+ "dlf.proxy.mode" = "DLF_ONLY",
+ "dlf.endpoint" = "datalake-vpc.cn-beijing.aliyuncs.com",
+ "dlf.region" = "cn-beijing",
+ "dlf.uid" = "uid",
+ "dlf.access_key" = "ak",
+ "dlf.secret_key" = "sk"
+);
```
-`type` should always be `hms`; while `hive.metastore.uris` can be arbitary
since it is not used in real practice, but it should follow the format of Hive
Metastore Thrift URI.
+## Column type mapping
+Consistent with Hive Catalog, please refer to the **column type mapping**
section in [Hive Catalog](./hive.md).
diff --git a/docs/en/docs/lakehouse/multi-catalog/iceberg.md
b/docs/en/docs/lakehouse/multi-catalog/iceberg.md
index 6f063ecf0b..18d66a5350 100644
--- a/docs/en/docs/lakehouse/multi-catalog/iceberg.md
+++ b/docs/en/docs/lakehouse/multi-catalog/iceberg.md
@@ -53,7 +53,7 @@ CREATE CATALOG iceberg PROPERTIES (
### Create Catalog based on Iceberg API
-Use the Iceberg API to access metadata, and support services such as Hive,
REST, and Glue as Iceberg's Catalog.
+Use the Iceberg API to access metadata, and support services such as Hive,
REST, DLF and Glue as Iceberg's Catalog.
#### Hive Metastore
@@ -85,6 +85,10 @@ CREATE CATALOG glue PROPERTIES (
For Iceberg properties, see [Iceberg Glue
Catalog](https://iceberg.apache.org/docs/latest/aws/#glue-catalog)
+#### Alibaba Cloud DLF
+
+see [Alibaba Cloud DLF Catalog](dlf.md)
+
#### REST Catalog
This method needs to provide REST services in advance, and users need to
implement the REST interface for obtaining Iceberg metadata.
diff --git a/docs/en/docs/lakehouse/multi-catalog/max_compute.md
b/docs/en/docs/lakehouse/multi-catalog/max_compute.md
new file mode 100644
index 0000000000..a2f141df70
--- /dev/null
+++ b/docs/en/docs/lakehouse/multi-catalog/max_compute.md
@@ -0,0 +1,60 @@
+---
+{
+ "title": "Alibaba Cloud Max Compute",
+ "language": "en"
+}
+---
+
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied. See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+
+# Alibaba Cloud MaxCompute
+
+MaxCompute (previously known as ODPS) is a data warehousing solution that can
process terabytes or petabytes of data.
+
+> [What is
MaxCompute](https://www.alibabacloud.com/help/en/maxcompute/product-overview/what-is-maxcompute)
+
+## Connect to MaxCompute
+
+```sql
+CREATE CATALOG mc PROPERTIES (
+ "type" = "max_compute",
+ "mc.region" = "cn-beijing",
+ "mc.default.project" = "your-project",
+ "mc.access_key" = "ak",
+ "mc.secret_key" = "sk"
+);
+```
+
+* `mc.region`: MaxCompute Region. Can Get the Region From
[Endpoints](https://www.alibabacloud.com/help/en/maxcompute/user-guide/endpoints).
+* `mc.default.project`: MaxCompute Project. See Your [MaxCompute
Projects](https://maxcompute.console.aliyun.com/cn-beijing/project-list).
+* `mc.access_key`: AccessKey, which you can create and manage on the [Alibaba
Cloud console](https://ram.console.aliyun.com/manage/ak).
+* `mc.secret_key`: SecretKey, which you can create and manage on the [Alibaba
Cloud console](https://ram.console.aliyun.com/manage/ak).
+* `mc.public_access`: You can enable public network access for test, when set
`"mc.public_access"="true"`.
+
+## Quotas
+
+Pay-as-you-go quota has limited concurrency and usage. For additional
resources, please refer to the documentation. See [Manage
quotas](https://www.alibabacloud.com/help/en/maxcompute/user-guide/manage-quotas-in-the-new-maxcompute-console).
+
+## Column type mapping
+
+Consistent with Hive Catalog, please refer to the **column type mapping**
section in [Hive Catalog](./hive.md).
+
+
diff --git a/docs/sidebars.json b/docs/sidebars.json
index 3c0920b94e..715681bd43 100644
--- a/docs/sidebars.json
+++ b/docs/sidebars.json
@@ -208,6 +208,7 @@
"lakehouse/multi-catalog/hudi",
"lakehouse/multi-catalog/paimon",
"lakehouse/multi-catalog/dlf",
+ "lakehouse/multi-catalog/max_compute",
"lakehouse/multi-catalog/es",
"lakehouse/multi-catalog/jdbc"
]
diff --git a/docs/zh-CN/docs/lakehouse/multi-catalog/dlf.md
b/docs/zh-CN/docs/lakehouse/multi-catalog/dlf.md
index 02bf1a140b..822ecff1bb 100644
--- a/docs/zh-CN/docs/lakehouse/multi-catalog/dlf.md
+++ b/docs/zh-CN/docs/lakehouse/multi-catalog/dlf.md
@@ -35,28 +35,28 @@ under the License.
## 连接 DLF
-### 方式一:创建Hive Catalog连接DLF
+### 创建DLF Catalog
```sql
-CREATE CATALOG hive_with_dlf PROPERTIES (
+CREATE CATALOG dlf PROPERTIES (
"type"="hms",
- "dlf.catalog.proxyMode" = "DLF_ONLY",
"hive.metastore.type" = "dlf",
- "dlf.catalog.endpoint" = "dlf.cn-beijing.aliyuncs.com",
- "dlf.catalog.region" = "cn-beijing",
- "dlf.catalog.uid" = "uid",
- "dlf.catalog.accessKeyId" = "ak",
- "dlf.catalog.accessKeySecret" = "sk"
+ "dlf.proxy.mode" = "DLF_ONLY",
+ "dlf.endpoint" = "datalake-vpc.cn-beijing.aliyuncs.com",
+ "dlf.region" = "cn-beijing",
+ "dlf.uid" = "uid",
+ "dlf.access_key" = "ak",
+ "dlf.secret_key" = "sk"
);
```
-其中 `type` 固定为 `hms`。 如果需要公网访问阿里云对象存储的数据,可以设置
`"dlf.catalog.accessPublic"="true"`
+其中 `type` 固定为 `hms`。 如果需要公网访问阿里云对象存储的数据,可以设置 `"dlf.access.public"="true"`
-* `dlf.catalog.endpoint`:DLF Endpoint,参阅:[DLF
Region和Endpoint对照表](https://www.alibabacloud.com/help/zh/data-lake-formation/latest/regions-and-endpoints)
-* `dlf.catalog.region`:DLF Region,参阅:[DLF
Region和Endpoint对照表](https://www.alibabacloud.com/help/zh/data-lake-formation/latest/regions-and-endpoints)
-* `dlf.catalog.uid`:阿里云账号。即阿里云控制台右上角个人信息的“云账号ID”。
-* `dlf.catalog.accessKeyId`:AccessKey。可以在
[阿里云控制台](https://ram.console.aliyun.com/manage/ak) 中创建和管理。
-* `dlf.catalog.accessKeySecret`:SecretKey。可以在
[阿里云控制台](https://ram.console.aliyun.com/manage/ak) 中创建和管理。
+* `dlf.endpoint`:DLF Endpoint,参阅:[DLF
Region和Endpoint对照表](https://www.alibabacloud.com/help/zh/data-lake-formation/latest/regions-and-endpoints)
+* `dlf.region`:DLF Region,参阅:[DLF
Region和Endpoint对照表](https://www.alibabacloud.com/help/zh/data-lake-formation/latest/regions-and-endpoints)
+* `dlf.uid`:阿里云账号。即阿里云控制台右上角个人信息的“云账号ID”。
+* `dlf.access_key`:AccessKey。可以在
[阿里云控制台](https://ram.console.aliyun.com/manage/ak) 中创建和管理。
+* `dlf.secret_key`:SecretKey。可以在
[阿里云控制台](https://ram.console.aliyun.com/manage/ak) 中创建和管理。
其他配置项为固定值,无需改动。
@@ -64,55 +64,42 @@ CREATE CATALOG hive_with_dlf PROPERTIES (
同 Hive Catalog 一样,支持访问 DLF 中的 Hive/Iceberg/Hudi 的元数据信息。
-### 方式二:配置Hive Conf连接DLF
-
-1. 创建 hive-site.xml 文件,并将其放置在 `fe/conf` 目录下。
-
- ```
- <?xml version="1.0"?>
- <configuration>
- <!--Set to use dlf client-->
- <property>
- <name>hive.metastore.type</name>
- <value>dlf</value>
- </property>
- <property>
- <name>dlf.catalog.endpoint</name>
- <value>dlf-vpc.cn-beijing.aliyuncs.com</value>
- </property>
- <property>
- <name>dlf.catalog.region</name>
- <value>cn-beijing</value>
- </property>
- <property>
- <name>dlf.catalog.proxyMode</name>
- <value>DLF_ONLY</value>
- </property>
- <property>
- <name>dlf.catalog.uid</name>
- <value>20000000000000000</value>
- </property>
- <property>
- <name>dlf.catalog.accessKeyId</name>
- <value>XXXXXXXXXXXXXXX</value>
- </property>
- <property>
- <name>dlf.catalog.accessKeySecret</name>
- <value>XXXXXXXXXXXXXXXXX</value>
- </property>
- </configuration>
- ```
-
-2. 重启 FE,Doris 会读取和解析 fe/conf/hive-site.xml。 并通过 `CREATE CATALOG` 语句创建 catalog。
+### 使用开启了HDFS服务的OSS存储数据
+
+1.
确认OSS开启了HDFS服务。[开通并授权访问OSS-HDFS服务](https://help.aliyun.com/document_detail/419505.html?spm=a2c4g.2357115.0.i0)
+2. 下载SDK。[JindoData
SDK下载](https://github.com/aliyun/alibabacloud-jindodata/blob/master/docs/user/5.x/5.0.0-beta7/jindodata_download.md)
+3.
解压下载后的jindosdk.tar.gz,将其lib目录下的`jindo-core.jar、jindo-sdk.jar`放到`${DORIS_HOME}/fe/lib`和`${DORIS_HOME}/be/lib/java_extensions`目录下。
+4. 创建DLF Catalog,并配置`oss.hdfs.enabled`为`true`:
```sql
-CREATE CATALOG hive_with_dlf PROPERTIES (
+CREATE CATALOG dlf_oss_hdfs PROPERTIES (
"type"="hms",
- "hive.metastore.uris" = "thrift://127.0.0.1:9083"
-)
+ "hive.metastore.type" = "dlf",
+ "dlf.proxy.mode" = "DLF_ONLY",
+ "dlf.endpoint" = "datalake-vpc.cn-beijing.aliyuncs.com",
+ "dlf.region" = "cn-beijing",
+ "dlf.uid" = "uid",
+ "dlf.access_key" = "ak",
+ "dlf.secret_key" = "sk",
+ "oss.hdfs.enabled" = "true"
+);
```
-其中 `type` 固定为 `hms`。`hive.metastore.uris` 的值随意填写即可,实际不会使用。但需要按照标准 hive
metastore thrift uri 格式填写。
-
+### 访问DLF Iceberg表
+
+```sql
+CREATE CATALOG dlf_iceberg PROPERTIES (
+ "type"="iceberg",
+ "iceberg.catalog.type" = "dlf",
+ "dlf.proxy.mode" = "DLF_ONLY",
+ "dlf.endpoint" = "datalake-vpc.cn-beijing.aliyuncs.com",
+ "dlf.region" = "cn-beijing",
+ "dlf.uid" = "uid",
+ "dlf.access_key" = "ak",
+ "dlf.secret_key" = "sk"
+);
+```
+## 列类型映射
+和 Hive Catalog 一致,可参阅 [Hive Catalog](./hive.md) 中 **列类型映射** 一节。
diff --git a/docs/zh-CN/docs/lakehouse/multi-catalog/iceberg.md
b/docs/zh-CN/docs/lakehouse/multi-catalog/iceberg.md
index f72553b6d0..bf5333388b 100644
--- a/docs/zh-CN/docs/lakehouse/multi-catalog/iceberg.md
+++ b/docs/zh-CN/docs/lakehouse/multi-catalog/iceberg.md
@@ -53,7 +53,7 @@ CREATE CATALOG iceberg PROPERTIES (
### 基于Iceberg API创建Catalog
-使用Iceberg API访问元数据的方式,支持Hive、REST、Glue等服务作为Iceberg的Catalog。
+使用Iceberg API访问元数据的方式,支持Hive、REST、Glue、DLF等服务作为Iceberg的Catalog。
#### Hive Metastore
@@ -85,6 +85,10 @@ CREATE CATALOG glue PROPERTIES (
Iceberg 属性详情参见 [Iceberg Glue
Catalog](https://iceberg.apache.org/docs/latest/aws/#glue-catalog)
+#### 阿里云 DLF
+
+参见[阿里云DLF Catalog配置](dlf.md)
+
#### REST Catalog
该方式需要预先提供REST服务,用户需实现获取Iceberg元数据的REST接口。
diff --git a/docs/zh-CN/docs/lakehouse/multi-catalog/max_compute.md
b/docs/zh-CN/docs/lakehouse/multi-catalog/max_compute.md
new file mode 100644
index 0000000000..e4b6eacb5f
--- /dev/null
+++ b/docs/zh-CN/docs/lakehouse/multi-catalog/max_compute.md
@@ -0,0 +1,60 @@
+---
+{
+ "title": "阿里云 Max Compute",
+ "language": "zh-CN"
+}
+---
+
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied. See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+
+# 阿里云 MaxCompute
+
+MaxCompute是阿里云上的企业级SaaS(Software as a Service)模式云数据仓库。
+
+> [什么是
MaxCompute](https://help.aliyun.com/zh/maxcompute/product-overview/what-is-maxcompute?spm=a2c4g.11174283.0.i1)
+
+## 连接 Max Compute
+
+```sql
+CREATE CATALOG mc PROPERTIES (
+ "type" = "max_compute",
+ "mc.region" = "cn-beijing",
+ "mc.default.project" = "your-project",
+ "mc.access_key" = "ak",
+ "mc.secret_key" = "sk"
+);
+```
+
+*
`mc.region`:MaxCompute开通的地域。可以从Endpoint中找到对应的Region,参阅[Endpoints](https://help.aliyun.com/zh/maxcompute/user-guide/endpoints?spm=a2c4g.11186623.0.0)。
+*
`mc.default.project`:MaxCompute项目。可以在[MaxCompute项目列表](https://maxcompute.console.aliyun.com/cn-beijing/project-list)中创建和管理。
+* `mc.access_key`:AccessKey。可以在
[阿里云控制台](https://ram.console.aliyun.com/manage/ak) 中创建和管理。
+* `mc.secret_key`:SecretKey。可以在
[阿里云控制台](https://ram.console.aliyun.com/manage/ak) 中创建和管理。
+* `mc.public_access`: 当配置了`"mc.public_access"="true"`,可以开启公网访问,建议测试时使用。
+
+## 限额
+
+连接MaxCompute时,按量付费的Quota查询并发和使用量有限,如需增加资源,请参照MaxCompute文档。参见[配额管理](https://help.aliyun.com/zh/maxcompute/user-guide/manage-quotas-in-the-new-maxcompute-console).
+
+## 列类型映射
+
+和 Hive Catalog 一致,可参阅 [Hive Catalog](./hive.md) 中 **列类型映射** 一节。
+
+
diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/util/S3Util.java
b/fe/fe-core/src/main/java/org/apache/doris/common/util/S3Util.java
index e3ae85461d..64c897c306 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/common/util/S3Util.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/common/util/S3Util.java
@@ -87,11 +87,23 @@ public class S3Util {
if (pos == -1) {
throw new RuntimeException("No '://' found in location: " +
location);
}
- location = "s3" + location.substring(pos);
+ if (isHdfsOnOssEndpoint(location)) {
+ // if hdfs service is enabled on oss, use oss location
+ // example:
oss://examplebucket.cn-shanghai.oss-dls.aliyuncs.com/dir/file/0000.orc
+ location = "oss" + location.substring(pos);
+ } else {
+ location = "s3" + location.substring(pos);
+ }
}
return new Path(location);
}
+ public static boolean isHdfsOnOssEndpoint(String location) {
+ // example: cn-shanghai.oss-dls.aliyuncs.com contains the
"oss-dls.aliyuncs".
+ //
https://www.alibabacloud.com/help/en/e-mapreduce/latest/oss-kusisurumen
+ return location.contains("oss-dls.aliyuncs");
+ }
+
public static S3Client buildS3Client(URI endpoint, String region,
CloudCredential credential) {
StaticCredentialsProvider scp;
AwsCredentials awsCredential;
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/PropertyConverter.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/PropertyConverter.java
index 094871d9f5..8787233e7d 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/PropertyConverter.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/PropertyConverter.java
@@ -17,6 +17,7 @@
package org.apache.doris.datasource.property;
+import org.apache.doris.common.util.S3Util;
import org.apache.doris.common.util.Util;
import org.apache.doris.datasource.credentials.CloudCredential;
import org.apache.doris.datasource.credentials.CloudCredentialWithEndpoint;
@@ -253,6 +254,11 @@ public class PropertyConverter {
ossProperties.put(org.apache.hadoop.fs.aliyun.oss.Constants.ENDPOINT_KEY,
endpoint);
ossProperties.put("fs.oss.impl.disable.cache", "true");
ossProperties.put("fs.oss.impl", AliyunOSSFileSystem.class.getName());
+ boolean hdfsEnabled =
Boolean.parseBoolean(props.getOrDefault(OssProperties.OSS_HDFS_ENABLED,
"false"));
+ if (S3Util.isHdfsOnOssEndpoint(endpoint) || hdfsEnabled) {
+ // use endpoint or enable hdfs
+ rewriteHdfsOnOssProperties(ossProperties, endpoint);
+ }
if (credential.isWhole()) {
ossProperties.put(org.apache.hadoop.fs.aliyun.oss.Constants.ACCESS_KEY_ID,
credential.getAccessKey());
ossProperties.put(org.apache.hadoop.fs.aliyun.oss.Constants.ACCESS_KEY_SECRET,
credential.getSecretKey());
@@ -268,6 +274,22 @@ public class PropertyConverter {
return ossProperties;
}
+ private static void rewriteHdfsOnOssProperties(Map<String, String>
ossProperties, String endpoint) {
+ if (!S3Util.isHdfsOnOssEndpoint(endpoint)) {
+ // just for robustness here, avoid wrong endpoint when oss-hdfs is
enabled.
+ // convert "oss-cn-beijing.aliyuncs.com" to
"cn-beijing.oss-dls.aliyuncs.com"
+ // reference link:
https://www.alibabacloud.com/help/en/e-mapreduce/latest/oss-kusisurumen
+ String[] endpointSplit = endpoint.split("\\.");
+ if (endpointSplit.length > 0) {
+ String region = endpointSplit[0].replace("oss-",
"").replace("-internal", "");
+
ossProperties.put(org.apache.hadoop.fs.aliyun.oss.Constants.ENDPOINT_KEY,
+ region + ".oss-dls.aliyuncs.com");
+ }
+ }
+ ossProperties.put("fs.oss.impl",
"com.aliyun.emr.fs.oss.JindoOssFileSystem");
+ ossProperties.put("fs.AbstractFileSystem.oss.impl",
"com.aliyun.emr.fs.oss.OSS");
+ }
+
private static Map<String, String> convertToCOSProperties(Map<String,
String> props, CloudCredential credential) {
Map<String, String> cosProperties = Maps.newHashMap();
cosProperties.put(CosNConfigKeys.COSN_ENDPOINT_SUFFIX_KEY,
props.get(CosProperties.ENDPOINT));
@@ -368,6 +390,10 @@ public class PropertyConverter {
if (Strings.isNullOrEmpty(uid)) {
throw new IllegalArgumentException("Required dlf property: " +
DataLakeConfig.CATALOG_USER_ID);
}
+ if (!props.containsKey(DLFProperties.ENDPOINT)) {
+ // just display DLFProperties in catalog, and hide DataLakeConfig
properties
+ putNewPropertiesForCompatibility(props, credential);
+ }
// convert to oss property
if (credential.isWhole()) {
props.put(OssProperties.ACCESS_KEY, credential.getAccessKey());
@@ -379,11 +405,32 @@ public class PropertyConverter {
String publicAccess =
props.getOrDefault(DLFProperties.Site.ACCESS_PUBLIC, "false");
String region = props.getOrDefault(DataLakeConfig.CATALOG_REGION_ID,
props.get(DLFProperties.REGION));
if (!Strings.isNullOrEmpty(region)) {
- props.put(OssProperties.REGION, "oss-" + region);
- props.put(OssProperties.ENDPOINT, getOssEndpoint(region,
Boolean.parseBoolean(publicAccess)));
+ boolean hdfsEnabled =
Boolean.parseBoolean(props.getOrDefault(OssProperties.OSS_HDFS_ENABLED,
"false"));
+ if (hdfsEnabled) {
+ props.putIfAbsent("fs.oss.impl",
"com.aliyun.emr.fs.oss.JindoOssFileSystem");
+ props.putIfAbsent(OssProperties.REGION, region);
+ // example: cn-shanghai.oss-dls.aliyuncs.com
+ // from
https://www.alibabacloud.com/help/en/e-mapreduce/latest/oss-kusisurumen
+ props.putIfAbsent(OssProperties.ENDPOINT, region +
".oss-dls.aliyuncs.com");
+ } else {
+ props.putIfAbsent(OssProperties.REGION, "oss-" + region);
+ props.putIfAbsent(OssProperties.ENDPOINT,
getOssEndpoint(region, Boolean.parseBoolean(publicAccess)));
+ }
}
}
+ private static void putNewPropertiesForCompatibility(Map<String, String>
props, CloudCredential credential) {
+ props.put(DLFProperties.UID,
props.get(DataLakeConfig.CATALOG_USER_ID));
+ String endpoint = props.get(DataLakeConfig.CATALOG_ENDPOINT);
+ props.put(DLFProperties.ENDPOINT, endpoint);
+ props.put(DLFProperties.REGION,
props.getOrDefault(DataLakeConfig.CATALOG_REGION_ID,
+ S3Properties.getRegionOfEndpoint(endpoint)));
+ props.put(DLFProperties.PROXY_MODE,
props.getOrDefault(DataLakeConfig.CATALOG_PROXY_MODE, "DLF_ONLY"));
+ props.put(DLFProperties.ACCESS_KEY, credential.getAccessKey());
+ props.put(DLFProperties.SECRET_KEY, credential.getSecretKey());
+ props.put(DLFProperties.ACCESS_PUBLIC,
props.getOrDefault(DLFProperties.Site.ACCESS_PUBLIC, "false"));
+ }
+
private static String getOssEndpoint(String region, boolean publicAccess) {
String prefix = "http://oss-";
String suffix = ".aliyuncs.com";
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/constants/OssProperties.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/constants/OssProperties.java
index 35c48c2730..210bc5814a 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/constants/OssProperties.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/constants/OssProperties.java
@@ -33,6 +33,7 @@ public class OssProperties extends BaseProperties {
public static final String ACCESS_KEY = "oss.access_key";
public static final String SECRET_KEY = "oss.secret_key";
public static final String SESSION_TOKEN = "oss.session_token";
+ public static final String OSS_HDFS_ENABLED = "oss.hdfs.enabled";
public static final List<String> REQUIRED_FIELDS = Arrays.asList(ENDPOINT,
ACCESS_KEY, SECRET_KEY);
public static CloudCredential getCredential(Map<String, String> props) {
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/planner/external/FileQueryScanNode.java
b/fe/fe-core/src/main/java/org/apache/doris/planner/external/FileQueryScanNode.java
index d366cbe90e..7a7dd76ab7 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/planner/external/FileQueryScanNode.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/planner/external/FileQueryScanNode.java
@@ -404,6 +404,10 @@ public abstract class FileQueryScanNode extends
FileScanNode {
protected static Optional<TFileType> getTFileType(String location) {
if (location != null && !location.isEmpty()) {
if (S3Util.isObjStorage(location)) {
+ if (S3Util.isHdfsOnOssEndpoint(location)) {
+ // if hdfs service is enabled on oss, use hdfs lib to
access oss.
+ return Optional.of(TFileType.FILE_HDFS);
+ }
return Optional.of(TFileType.FILE_S3);
} else if (location.startsWith(FeConstants.FS_PREFIX_HDFS)) {
return Optional.of(TFileType.FILE_HDFS);
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]