This is an automated email from the ASF dual-hosted git repository.
dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new ad551b0e3a6 branch-3.0: [fix](s3) support chinacloudapi endpoint for
azure #47703 (#48642)
ad551b0e3a6 is described below
commit ad551b0e3a6cb230ba45e0ffd9bc77c7ed7a4257
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Mon Mar 10 20:51:04 2025 +0800
branch-3.0: [fix](s3) support chinacloudapi endpoint for azure #47703
(#48642)
Cherry-picked from #47703
Co-authored-by: Kaijie Chen <[email protected]>
---
be/src/common/config.cpp | 2 +
be/src/common/config.h | 2 +
be/src/io/fs/azure_obj_storage_client.cpp | 14 ++++--
be/src/util/s3_util.cpp | 11 ++++-
cloud/src/common/config.h | 2 +
cloud/src/recycler/s3_accessor.cpp | 10 +++-
.../main/java/org/apache/doris/common/Config.java | 9 ++++
.../apache/doris/cloud/storage/AzureRemote.java | 15 +++---
.../property/constants/AzureProperties.java | 17 +++++--
.../org/apache/doris/fs/obj/AzureObjStorage.java | 15 +++---
.../doris/tablefunction/S3TableValuedFunction.java | 17 +++----
.../property/constants/AzurePropertiesTest.java | 55 ++++++++++++++++++++++
12 files changed, 134 insertions(+), 35 deletions(-)
diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp
index bdcadef7820..0ac96160f62 100644
--- a/be/src/common/config.cpp
+++ b/be/src/common/config.cpp
@@ -1306,6 +1306,8 @@ DEFINE_Int32(spill_io_thread_pool_queue_size, "102400");
DEFINE_mBool(check_segment_when_build_rowset_meta, "false");
+DEFINE_mBool(force_azure_blob_global_endpoint, "false");
+
DEFINE_mInt32(max_s3_client_retry, "10");
DEFINE_mInt32(s3_read_base_wait_time_ms, "100");
DEFINE_mInt32(s3_read_max_wait_time_ms, "800");
diff --git a/be/src/common/config.h b/be/src/common/config.h
index 2edf21b17d0..79a8db32a8a 100644
--- a/be/src/common/config.h
+++ b/be/src/common/config.h
@@ -1382,6 +1382,8 @@ DECLARE_mBool(check_segment_when_build_rowset_meta);
DECLARE_Int32(num_query_ctx_map_partitions);
+DECLARE_mBool(force_azure_blob_global_endpoint);
+
DECLARE_mBool(enable_s3_rate_limiter);
DECLARE_mInt64(s3_get_bucket_tokens);
DECLARE_mInt64(s3_get_token_per_second);
diff --git a/be/src/io/fs/azure_obj_storage_client.cpp
b/be/src/io/fs/azure_obj_storage_client.cpp
index bf2e370da6f..ee4b8f7ac89 100644
--- a/be/src/io/fs/azure_obj_storage_client.cpp
+++ b/be/src/io/fs/azure_obj_storage_client.cpp
@@ -80,7 +80,7 @@ auto s3_put_rate_limit(Func callback) -> decltype(callback())
{
return s3_rate_limit(doris::S3RateLimitType::PUT, std::move(callback));
}
-constexpr char SAS_TOKEN_URL_TEMPLATE[] =
"https://{}.blob.core.windows.net/{}/{}{}";
+constexpr char SAS_TOKEN_URL_TEMPLATE[] = "{}/{}/{}{}";
constexpr char BlobNotFound[] = "BlobNotFound";
} // namespace
@@ -416,6 +416,14 @@ std::string
AzureObjStorageClient::generate_presigned_url(const ObjectStoragePat
std::string sasToken = sas_builder.GenerateSasToken(
Azure::Storage::StorageSharedKeyCredential(conf.ak, conf.sk));
- return fmt::format(SAS_TOKEN_URL_TEMPLATE, conf.ak, conf.bucket, opts.key,
sasToken);
+ std::string endpoint = conf.endpoint;
+ if (doris::config::force_azure_blob_global_endpoint) {
+ endpoint = fmt::format("https://{}.blob.core.windows.net", conf.ak);
+ }
+ auto sasURL = fmt::format(SAS_TOKEN_URL_TEMPLATE, endpoint, conf.bucket,
opts.key, sasToken);
+ if (sasURL.find("://") == std::string::npos) {
+ sasURL = "https://" + sasURL;
+ }
+ return sasURL;
}
-} // namespace doris::io
\ No newline at end of file
+} // namespace doris::io
diff --git a/be/src/util/s3_util.cpp b/be/src/util/s3_util.cpp
index dece1074e60..c5937d7659f 100644
--- a/be/src/util/s3_util.cpp
+++ b/be/src/util/s3_util.cpp
@@ -254,8 +254,15 @@ std::shared_ptr<io::ObjStorageClient>
S3ClientFactory::_create_azure_client(
std::make_shared<Azure::Storage::StorageSharedKeyCredential>(s3_conf.ak,
s3_conf.sk);
const std::string container_name = s3_conf.bucket;
- const std::string uri =
- fmt::format("{}://{}.blob.core.windows.net/{}", "https",
s3_conf.ak, container_name);
+ std::string uri;
+ if (config::force_azure_blob_global_endpoint) {
+ uri = fmt::format("https://{}.blob.core.windows.net/{}", s3_conf.ak,
container_name);
+ } else {
+ uri = fmt::format("{}/{}", s3_conf.endpoint, container_name);
+ if (s3_conf.endpoint.find("://") == std::string::npos) {
+ uri = "https://" + uri;
+ }
+ }
auto containerClient =
std::make_shared<Azure::Storage::Blobs::BlobContainerClient>(uri, cred);
LOG_INFO("create one azure client with {}", s3_conf.to_string());
diff --git a/cloud/src/common/config.h b/cloud/src/common/config.h
index fbae4dbeddf..fd79bff8e16 100644
--- a/cloud/src/common/config.h
+++ b/cloud/src/common/config.h
@@ -228,6 +228,8 @@ CONF_Validator(s3_client_http_scheme, [](const std::string&
config) -> bool {
return config == "http" || config == "https";
});
+CONF_Bool(force_azure_blob_global_endpoint, "false");
+
// Max retry times for object storage request
CONF_mInt64(max_s3_client_retry, "10");
diff --git a/cloud/src/recycler/s3_accessor.cpp
b/cloud/src/recycler/s3_accessor.cpp
index 5356ddf38af..baf69df57e3 100644
--- a/cloud/src/recycler/s3_accessor.cpp
+++ b/cloud/src/recycler/s3_accessor.cpp
@@ -255,8 +255,14 @@ int S3Accessor::init() {
options.Retry.MaxRetries = config::max_s3_client_retry;
auto cred =
std::make_shared<Azure::Storage::StorageSharedKeyCredential>(conf_.ak,
conf_.sk);
- uri_ = fmt::format("{}://{}.blob.core.windows.net/{}",
config::s3_client_http_scheme,
- conf_.ak, conf_.bucket);
+ if (config::force_azure_blob_global_endpoint) {
+ uri_ = fmt::format("https://{}.blob.core.windows.net/{}",
conf_.ak, conf_.bucket);
+ } else {
+ uri_ = fmt::format("{}/{}", conf_.endpoint, conf_.bucket);
+ if (uri_.find("://") == std::string::npos) {
+ uri_ = "https://" + uri_;
+ }
+ }
// In Azure's HTTP requests, all policies in the vector are called in
a chained manner following the HTTP pipeline approach.
// Within the RetryPolicy, the nextPolicy is called multiple times
inside a loop.
// All policies in the PerRetryPolicies are downstream of the
RetryPolicy.
diff --git a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
index cda1b914041..90a1951fc45 100644
--- a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
+++ b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
@@ -3138,6 +3138,15 @@ public class Config extends ConfigBase {
+ "for example: s3_load_endpoint_white_list=a,b,c"})
public static String[] s3_load_endpoint_white_list = {};
+ @ConfField(mutable = true, description = {
+ "此参数控制是否强制使用 Azure global endpoint。默认值为 false,系统将使用用户指定的 endpoint。"
+ + "如果设置为 true,系统将强制使用 {account}.blob.core.windows.net。",
+ "This parameter controls whether to force the use of the Azure
global endpoint. "
+ + "The default is false, meaning the system will use the
user-specified endpoint. "
+ + "If set to true, the system will force the use of
{account}.blob.core.windows.net."
+ })
+ public static boolean force_azure_blob_global_endpoint = false;
+
@ConfField(mutable = true, description = {"指定Jdbc driver url白名单, 举例:
jdbc_driver_url_white_list=a,b,c",
"the white list for jdbc driver url, if it is empty, no white list
will be set"
+ "for example: jdbc_driver_url_white_list=a,b,c"
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/cloud/storage/AzureRemote.java
b/fe/fe-core/src/main/java/org/apache/doris/cloud/storage/AzureRemote.java
index 62026b47b87..86d5610bb0e 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/cloud/storage/AzureRemote.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/cloud/storage/AzureRemote.java
@@ -18,6 +18,7 @@
package org.apache.doris.cloud.storage;
import org.apache.doris.common.DdlException;
+import org.apache.doris.datasource.property.constants.AzureProperties;
import com.azure.core.credential.AccessToken;
import com.azure.core.credential.TokenCredential;
@@ -58,8 +59,6 @@ public class AzureRemote extends RemoteBase {
private static final Logger LOG = LogManager.getLogger(AzureRemote.class);
- private static final String URI_TEMPLATE =
"https://%s.blob.core.windows.net/%s";
-
private BlobContainerClient client;
public AzureRemote(ObjectInfo obj) {
@@ -72,8 +71,8 @@ public class AzureRemote extends RemoteBase {
BlobContainerClientBuilder builder = new
BlobContainerClientBuilder();
builder.credential(new StorageSharedKeyCredential(obj.getAk(),
obj.getSk()));
String containerName = obj.getBucket();
- String uri = String.format(URI_TEMPLATE, obj.getAk(),
- containerName);
+ String endpoint =
AzureProperties.formatAzureEndpoint(obj.getEndpoint(), obj.getAk());
+ String uri = endpoint + "/" + containerName;
builder.endpoint(uri);
BlobContainerClient containerClient = builder.buildClient();
@@ -134,8 +133,8 @@ public class AzureRemote extends RemoteBase {
BlobContainerClientBuilder builder = new
BlobContainerClientBuilder();
builder.credential(new StorageSharedKeyCredential(obj.getAk(),
obj.getSk()));
String containerName = obj.getBucket();
- String uri = String.format(URI_TEMPLATE, obj.getAk(),
- containerName);
+ String endpoint =
AzureProperties.formatAzureEndpoint(obj.getEndpoint(), obj.getAk());
+ String uri = endpoint + "/" + containerName;
builder.endpoint(uri);
BlobContainerClient containerClient = builder.buildClient();
BlobServiceClient blobServiceClient =
containerClient.getServiceClient();
@@ -229,8 +228,8 @@ public class AzureRemote extends RemoteBase {
builder.credential(new StorageSharedKeyCredential(obj.getAk(),
obj.getSk()));
}
String containerName = obj.getBucket();
- String uri = String.format(URI_TEMPLATE, obj.getAk(),
- containerName);
+ String endpoint =
AzureProperties.formatAzureEndpoint(obj.getEndpoint(), obj.getAk());
+ String uri = endpoint + "/" + containerName;
builder.endpoint(uri);
client = builder.buildClient();
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/constants/AzureProperties.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/constants/AzureProperties.java
index daceda0bc35..70ba490ccfd 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/constants/AzureProperties.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/constants/AzureProperties.java
@@ -17,7 +17,7 @@
package org.apache.doris.datasource.property.constants;
-
+import org.apache.doris.common.Config;
import org.apache.doris.common.credentials.CloudCredential;
import java.util.Arrays;
@@ -36,7 +36,7 @@ public class AzureProperties extends BaseProperties {
public static final String SESSION_TOKEN = "azure.session_token";
public static final List<String> REQUIRED_FIELDS = Arrays.asList(ENDPOINT,
ACCESS_KEY, SECRET_KEY);
- public static final String AZURE_ENDPOINT_TEMPLATE =
"%s.blob.core.windows.net/%s";
+ public static final String AZURE_ENDPOINT_TEMPLATE =
"https://%s.blob.core.windows.net";
public static class FS {
public static final String SESSION_TOKEN = "fs.azure.session.token";
@@ -47,7 +47,7 @@ public class AzureProperties extends BaseProperties {
return getCloudCredential(props, ACCESS_KEY, SECRET_KEY,
SESSION_TOKEN);
}
- public static Boolean checkAzureProviderPropertyExist(Map<String, String>
properties) {
+ public static boolean checkAzureProviderPropertyExist(Map<String, String>
properties) {
for (Map.Entry<String, String> entry : properties.entrySet()) {
if (entry.getKey().toLowerCase().contains(S3Properties.PROVIDER)
&&
entry.getValue().toUpperCase().equals(AzureProperties.AZURE_NAME)) {
@@ -56,4 +56,15 @@ public class AzureProperties extends BaseProperties {
}
return false;
}
+
+ public static String formatAzureEndpoint(String endpoint, String
accountName) {
+ if (Config.force_azure_blob_global_endpoint) {
+ return String.format(AZURE_ENDPOINT_TEMPLATE, accountName);
+ }
+ if (endpoint.contains("://")) {
+ return endpoint;
+ }
+ return "https://" + endpoint;
+ }
+
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/fs/obj/AzureObjStorage.java
b/fe/fe-core/src/main/java/org/apache/doris/fs/obj/AzureObjStorage.java
index cda78ba8773..e59bc6ac52e 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/fs/obj/AzureObjStorage.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/fs/obj/AzureObjStorage.java
@@ -22,6 +22,7 @@ import org.apache.doris.common.DdlException;
import org.apache.doris.common.UserException;
import org.apache.doris.common.util.S3URI;
import org.apache.doris.datasource.property.PropertyConverter;
+import org.apache.doris.datasource.property.constants.AzureProperties;
import org.apache.doris.datasource.property.constants.S3Properties;
import org.apache.doris.fs.remote.RemoteFile;
@@ -61,7 +62,6 @@ import java.util.TreeMap;
public class AzureObjStorage implements ObjStorage<BlobServiceClient> {
private static final Logger LOG =
LogManager.getLogger(AzureObjStorage.class);
- private static final String URI_TEMPLATE =
"https://%s.blob.core.windows.net";
protected Map<String, String> properties;
private BlobServiceClient client;
private boolean isUsePathStyle = false;
@@ -99,7 +99,7 @@ public class AzureObjStorage implements
ObjStorage<BlobServiceClient> {
}
// Virtual hosted-style is recommended in the s3 protocol.
// The path-style has been abandoned, but for some unexplainable
reasons,
- // the s3 client will determine whether the endpiont starts with `s3`
+ // the s3 client will determine whether the endpoint starts with `s3`
// when generating a virtual hosted-sytle request.
// If not, it will not be converted (
https://github.com/aws/aws-sdk-java-v2/pull/763),
// but the endpoints of many cloud service providers for object
storage do not start with s3,
@@ -116,12 +116,14 @@ public class AzureObjStorage implements
ObjStorage<BlobServiceClient> {
@Override
public BlobServiceClient getClient() throws UserException {
if (client == null) {
- String uri = String.format(URI_TEMPLATE,
properties.get(S3Properties.ACCESS_KEY));
- StorageSharedKeyCredential cred = new
StorageSharedKeyCredential(properties.get(S3Properties.ACCESS_KEY),
+ final String accountName = properties.get(S3Properties.ACCESS_KEY);
+ final String endpoint = AzureProperties.formatAzureEndpoint(
+ properties.get(S3Properties.ENDPOINT), accountName);
+ StorageSharedKeyCredential cred = new
StorageSharedKeyCredential(accountName,
properties.get(S3Properties.SECRET_KEY));
BlobServiceClientBuilder builder = new BlobServiceClientBuilder();
builder.credential(cred);
- builder.endpoint(uri);
+ builder.endpoint(endpoint);
client = builder.buildClient();
}
return client;
@@ -389,7 +391,8 @@ public class AzureObjStorage implements
ObjStorage<BlobServiceClient> {
+ " failed because azure error: " + e.getMessage());
} catch (Exception e) {
LOG.warn("errors while glob file " + remotePath, e);
- st = new Status(Status.ErrCode.COMMON_ERROR, "errors while glob
file " + remotePath + e.getMessage());
+ st = new Status(Status.ErrCode.COMMON_ERROR,
+ "errors while glob file " + remotePath + ": " +
e.getMessage());
} finally {
long endTime = System.nanoTime();
long duration = endTime - startTime;
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/S3TableValuedFunction.java
b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/S3TableValuedFunction.java
index 3defb171a9f..56c438c303e 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/S3TableValuedFunction.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/S3TableValuedFunction.java
@@ -118,21 +118,16 @@ public class S3TableValuedFunction extends
ExternalFileTableValuedFunction {
}
private String constructEndpoint(Map<String, String> properties, S3URI
s3uri) throws AnalysisException {
- String endpoint;
- if (!AzureProperties.checkAzureProviderPropertyExist(properties)) {
- // get endpoint first from properties, if not present, get it from
s3 uri.
- // If endpoint is missing, exception will be thrown.
- endpoint = getOrDefaultAndRemove(properties,
S3Properties.ENDPOINT, s3uri.getEndpoint().orElse(""));
- if (Strings.isNullOrEmpty(endpoint)) {
- throw new AnalysisException(String.format("Properties '%s' is
required.", S3Properties.ENDPOINT));
- }
- } else {
- String bucket = s3uri.getBucket();
+ // get endpoint first from properties, if not present, get it from s3
uri.
+ String endpoint = getOrDefaultAndRemove(properties,
S3Properties.ENDPOINT, s3uri.getEndpoint().orElse(""));
+ if (AzureProperties.checkAzureProviderPropertyExist(properties)) {
String accountName =
properties.getOrDefault(S3Properties.ACCESS_KEY, "");
if (accountName.isEmpty()) {
throw new AnalysisException(String.format("Properties '%s' is
required.", S3Properties.ACCESS_KEY));
}
- endpoint = String.format(AzureProperties.AZURE_ENDPOINT_TEMPLATE,
accountName, bucket);
+ endpoint = AzureProperties.formatAzureEndpoint(endpoint,
accountName);
+ } else if (Strings.isNullOrEmpty(endpoint)) {
+ throw new AnalysisException(String.format("Properties '%s' is
required.", S3Properties.ENDPOINT));
}
return endpoint;
}
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/datasource/property/constants/AzurePropertiesTest.java
b/fe/fe-core/src/test/java/org/apache/doris/datasource/property/constants/AzurePropertiesTest.java
new file mode 100644
index 00000000000..e155b5434d8
--- /dev/null
+++
b/fe/fe-core/src/test/java/org/apache/doris/datasource/property/constants/AzurePropertiesTest.java
@@ -0,0 +1,55 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.datasource.property.constants;
+
+import org.apache.doris.common.Config;
+
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+public class AzurePropertiesTest {
+
+ @Test
+ public static void testFormatAzureEndpointGlobal() {
+ Config.force_azure_blob_global_endpoint = true;
+ String endpoint = AzureProperties.formatAzureEndpoint("ANY-ENDPOINT",
"ak");
+ Assertions.assertEquals("https://ak.blob.core.windows.net", endpoint);
+ }
+
+ @Test
+ public static void testFormatAzureEndpoint() {
+ Config.force_azure_blob_global_endpoint = false;
+ String endpoint =
AzureProperties.formatAzureEndpoint("ak.blob.core.chinacloudapi.cn",
"ANY-ACCOUNT");
+ Assertions.assertEquals("https://ak.blob.core.chinacloudapi.cn",
endpoint);
+ }
+
+ @Test
+ public static void testFormatAzureEndpointHTTPS() {
+ Config.force_azure_blob_global_endpoint = false;
+ String endpoint =
AzureProperties.formatAzureEndpoint("https://ak.blob.core.chinacloudapi.cn",
"ANY-ACCOUNT");
+ Assertions.assertEquals("https://ak.blob.core.chinacloudapi.cn",
endpoint);
+ }
+
+ @Test
+ public static void testFormatAzureEndpointHTTP() {
+ Config.force_azure_blob_global_endpoint = false;
+ String endpoint =
AzureProperties.formatAzureEndpoint("http://ak.blob.core.chinacloudapi.cn",
"ANY-ACCOUNT");
+ Assertions.assertEquals("http://ak.blob.core.chinacloudapi.cn",
endpoint);
+ }
+
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]