This is an automated email from the ASF dual-hosted git repository.
dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new 9b375be9623 branch-3.0: [fix] let backup work on azure #46411 (#46445)
9b375be9623 is described below
commit 9b375be9623a0c226917165c90f9b75fcff88f96
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Fri Jan 10 12:11:42 2025 +0800
branch-3.0: [fix] let backup work on azure #46411 (#46445)
Cherry-picked from #46411
Co-authored-by: Yongqiang YANG <[email protected]>
---
be/src/io/fs/azure_obj_storage_client.cpp | 2 +-
be/src/util/s3_util.cpp | 4 +-
.../org/apache/doris/analysis/StorageBackend.java | 1 +
.../datasource/property/S3ClientBEProperties.java | 3 ++
.../org/apache/doris/fs/obj/AzureObjStorage.java | 46 ++++++++++++++++------
.../java/org/apache/doris/fs/obj/S3ObjStorage.java | 5 +++
.../apache/doris/fs/remote/AzureFileSystem.java | 5 +--
.../org/apache/doris/persist/gson/GsonUtils.java | 4 +-
.../datasource/property/PropertyConverterTest.java | 2 +-
.../apache/doris/fs/obj/AzureObjStorageTest.java | 27 ++++++++-----
10 files changed, 71 insertions(+), 28 deletions(-)
diff --git a/be/src/io/fs/azure_obj_storage_client.cpp
b/be/src/io/fs/azure_obj_storage_client.cpp
index 9f33db3400a..44d45077ebc 100644
--- a/be/src/io/fs/azure_obj_storage_client.cpp
+++ b/be/src/io/fs/azure_obj_storage_client.cpp
@@ -311,7 +311,7 @@ ObjectStorageResponse
AzureObjStorageClient::list_objects(const ObjectStoragePat
return _client->ListBlobs(list_opts);
});
get_file_file(resp);
- while (!resp.NextPageToken->empty()) {
+ while (resp.NextPageToken.HasValue()) {
list_opts.ContinuationToken = resp.NextPageToken;
resp = s3_get_rate_limit([&]() {
SCOPED_BVAR_LATENCY(s3_bvar::s3_list_latency);
diff --git a/be/src/util/s3_util.cpp b/be/src/util/s3_util.cpp
index f87389b6b3e..dece1074e60 100644
--- a/be/src/util/s3_util.cpp
+++ b/be/src/util/s3_util.cpp
@@ -254,8 +254,8 @@ std::shared_ptr<io::ObjStorageClient>
S3ClientFactory::_create_azure_client(
std::make_shared<Azure::Storage::StorageSharedKeyCredential>(s3_conf.ak,
s3_conf.sk);
const std::string container_name = s3_conf.bucket;
- const std::string uri = fmt::format("{}://{}.blob.core.windows.net/{}",
- config::s3_client_http_scheme,
s3_conf.ak, container_name);
+ const std::string uri =
+ fmt::format("{}://{}.blob.core.windows.net/{}", "https",
s3_conf.ak, container_name);
auto containerClient =
std::make_shared<Azure::Storage::Blobs::BlobContainerClient>(uri, cred);
LOG_INFO("create one azure client with {}", s3_conf.to_string());
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/analysis/StorageBackend.java
b/fe/fe-core/src/main/java/org/apache/doris/analysis/StorageBackend.java
index b07725d2507..67a76cec450 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/StorageBackend.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/StorageBackend.java
@@ -178,6 +178,7 @@ public class StorageBackend implements ParseNode {
return TStorageBackendType.JFS;
case LOCAL:
return TStorageBackendType.LOCAL;
+ // deprecated
case AZURE:
return TStorageBackendType.AZURE;
default:
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/S3ClientBEProperties.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/S3ClientBEProperties.java
index 7d8c2668fea..093b74b80ae 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/S3ClientBEProperties.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/S3ClientBEProperties.java
@@ -85,6 +85,9 @@ public class S3ClientBEProperties {
if (properties.containsKey(PropertyConverter.USE_PATH_STYLE)) {
beProperties.put(PropertyConverter.USE_PATH_STYLE,
properties.get(PropertyConverter.USE_PATH_STYLE));
}
+ if (properties.containsKey(S3Properties.PROVIDER)) {
+ beProperties.put(S3Properties.PROVIDER,
properties.get(S3Properties.PROVIDER));
+ }
return beProperties;
}
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/fs/obj/AzureObjStorage.java
b/fe/fe-core/src/main/java/org/apache/doris/fs/obj/AzureObjStorage.java
index 780d2ab9fa3..0253993cc42 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/fs/obj/AzureObjStorage.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/fs/obj/AzureObjStorage.java
@@ -36,6 +36,7 @@ import com.azure.storage.blob.BlobServiceClientBuilder;
import com.azure.storage.blob.batch.BlobBatch;
import com.azure.storage.blob.batch.BlobBatchClient;
import com.azure.storage.blob.batch.BlobBatchClientBuilder;
+import com.azure.storage.blob.models.BlobErrorCode;
import com.azure.storage.blob.models.BlobItem;
import com.azure.storage.blob.models.BlobProperties;
import com.azure.storage.blob.models.BlobStorageException;
@@ -53,6 +54,7 @@ import java.nio.file.FileSystems;
import java.nio.file.PathMatcher;
import java.nio.file.Paths;
import java.util.ArrayList;
+import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
@@ -196,6 +198,9 @@ public class AzureObjStorage implements
ObjStorage<BlobServiceClient> {
LOG.info("delete file " + remotePath + " success");
return Status.OK;
} catch (BlobStorageException e) {
+ if (e.getErrorCode() == BlobErrorCode.BLOB_NOT_FOUND) {
+ return Status.OK;
+ }
return new Status(
Status.ErrCode.COMMON_ERROR,
"get file from azure error: " + e.getServiceMessage());
@@ -331,6 +336,7 @@ public class AzureObjStorage implements
ObjStorage<BlobServiceClient> {
LOG.info("path pattern {}", pathPattern.toString());
PathMatcher matcher =
FileSystems.getDefault().getPathMatcher("glob:" + pathPattern.toString());
+ HashSet<String> directorySet = new HashSet<>();
String listPrefix = getLongestPrefix(globPath);
LOG.info("azure glob list prefix is {}", listPrefix);
ListBlobsOptions options = new
ListBlobsOptions().setPrefix(listPrefix);
@@ -343,18 +349,36 @@ public class AzureObjStorage implements
ObjStorage<BlobServiceClient> {
elementCnt++;
java.nio.file.Path blobPath =
Paths.get(blobItem.getName());
- if (!matcher.matches(blobPath)) {
- continue;
+ boolean isPrefix = false;
+ while
(blobPath.normalize().toString().startsWith(listPrefix)) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("get blob {}",
blobPath.normalize().toString());
+ }
+ if (!matcher.matches(blobPath)) {
+ isPrefix = true;
+ blobPath = blobPath.getParent();
+ continue;
+ }
+ if
(directorySet.contains(blobPath.normalize().toString())) {
+ break;
+ }
+ if (isPrefix) {
+ directorySet.add(blobPath.normalize().toString());
+ }
+
+ matchCnt++;
+ RemoteFile remoteFile = new RemoteFile(
+ fileNameOnly ?
blobPath.getFileName().toString() : constructS3Path(blobPath.toString(),
+ uri.getBucket()),
+ !isPrefix,
+ isPrefix ? -1 :
blobItem.getProperties().getContentLength(),
+ isPrefix ? -1 :
blobItem.getProperties().getContentLength(),
+ isPrefix ? 0 :
blobItem.getProperties().getLastModified().getSecond());
+ result.add(remoteFile);
+
+ blobPath = blobPath.getParent();
+ isPrefix = true;
}
- matchCnt++;
- RemoteFile remoteFile = new RemoteFile(
- fileNameOnly ? blobPath.getFileName().toString() :
constructS3Path(blobPath.toString(),
- uri.getBucket()),
- !blobItem.isPrefix(),
- blobItem.isPrefix() ? -1 :
blobItem.getProperties().getContentLength(),
- blobItem.getProperties().getContentLength(),
-
blobItem.getProperties().getLastModified().getSecond());
- result.add(remoteFile);
}
newContinuationToken = pagedResponse.getContinuationToken();
} while (newContinuationToken != null);
diff --git a/fe/fe-core/src/main/java/org/apache/doris/fs/obj/S3ObjStorage.java
b/fe/fe-core/src/main/java/org/apache/doris/fs/obj/S3ObjStorage.java
index 69a8ac4d794..edcb54bf8fa 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/fs/obj/S3ObjStorage.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/fs/obj/S3ObjStorage.java
@@ -102,6 +102,11 @@ public class S3ObjStorage implements ObjStorage<S3Client> {
.equalsIgnoreCase("true");
forceParsingByStandardUri =
this.properties.getOrDefault(PropertyConverter.FORCE_PARSING_BY_STANDARD_URI,
"false").equalsIgnoreCase("true");
+
+ String endpoint = this.properties.get(S3Properties.ENDPOINT);
+ String region = this.properties.get(S3Properties.REGION);
+
+ this.properties.put(S3Properties.REGION,
PropertyConverter.checkRegion(endpoint, region, S3Properties.REGION));
}
@Override
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/fs/remote/AzureFileSystem.java
b/fe/fe-core/src/main/java/org/apache/doris/fs/remote/AzureFileSystem.java
index 5004cfd2f12..c116182d3a4 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/fs/remote/AzureFileSystem.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/fs/remote/AzureFileSystem.java
@@ -17,7 +17,6 @@
package org.apache.doris.fs.remote;
-import org.apache.doris.analysis.StorageBackend;
import org.apache.doris.analysis.StorageBackend.StorageType;
import org.apache.doris.backup.Status;
import org.apache.doris.common.UserException;
@@ -35,13 +34,13 @@ public class AzureFileSystem extends ObjFileSystem {
private static final Logger LOG =
LogManager.getLogger(AzureFileSystem.class);
public AzureFileSystem(Map<String, String> properties) {
- super(StorageType.AZURE.name(), StorageType.AZURE, new
AzureObjStorage(properties));
+ super(StorageType.AZURE.name(), StorageType.S3, new
AzureObjStorage(properties));
initFsProperties();
}
@VisibleForTesting
public AzureFileSystem(AzureObjStorage storage) {
- super(StorageBackend.StorageType.AZURE.name(),
StorageBackend.StorageType.AZURE, storage);
+ super(StorageType.AZURE.name(), StorageType.S3, storage);
initFsProperties();
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/persist/gson/GsonUtils.java
b/fe/fe-core/src/main/java/org/apache/doris/persist/gson/GsonUtils.java
index ee6f2f74eac..ec25bfa134d 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/persist/gson/GsonUtils.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/persist/gson/GsonUtils.java
@@ -172,6 +172,7 @@ import
org.apache.doris.datasource.trinoconnector.TrinoConnectorExternalCatalog;
import
org.apache.doris.datasource.trinoconnector.TrinoConnectorExternalDatabase;
import org.apache.doris.datasource.trinoconnector.TrinoConnectorExternalTable;
import org.apache.doris.fs.PersistentFileSystem;
+import org.apache.doris.fs.remote.AzureFileSystem;
import org.apache.doris.fs.remote.BrokerFileSystem;
import org.apache.doris.fs.remote.ObjFileSystem;
import org.apache.doris.fs.remote.S3FileSystem;
@@ -566,7 +567,8 @@ public class GsonUtils {
.registerSubtype(JFSFileSystem.class,
JFSFileSystem.class.getSimpleName())
.registerSubtype(OFSFileSystem.class,
OFSFileSystem.class.getSimpleName())
.registerSubtype(ObjFileSystem.class,
ObjFileSystem.class.getSimpleName())
- .registerSubtype(S3FileSystem.class,
S3FileSystem.class.getSimpleName());
+ .registerSubtype(S3FileSystem.class,
S3FileSystem.class.getSimpleName())
+ .registerSubtype(AzureFileSystem.class,
AzureFileSystem.class.getSimpleName());
private static
RuntimeTypeAdapterFactory<org.apache.doris.backup.AbstractJob>
jobBackupTypeAdapterFactory
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/datasource/property/PropertyConverterTest.java
b/fe/fe-core/src/test/java/org/apache/doris/datasource/property/PropertyConverterTest.java
index 8967ca5fae0..eacd0bacbb3 100644
---
a/fe/fe-core/src/test/java/org/apache/doris/datasource/property/PropertyConverterTest.java
+++
b/fe/fe-core/src/test/java/org/apache/doris/datasource/property/PropertyConverterTest.java
@@ -195,7 +195,7 @@ public class PropertyConverterTest extends
TestWithFeService {
CreateRepositoryStmt analyzedStmtNew = createStmt(s3RepoNew);
Assertions.assertEquals(analyzedStmtNew.getProperties().size(), 3);
Repository repositoryNew = getRepository(analyzedStmtNew,
"s3_repo_new");
-
Assertions.assertEquals(repositoryNew.getRemoteFileSystem().getProperties().size(),
4);
+
Assertions.assertEquals(repositoryNew.getRemoteFileSystem().getProperties().size(),
5);
}
private static Repository getRepository(CreateRepositoryStmt analyzedStmt,
String name) throws DdlException {
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/fs/obj/AzureObjStorageTest.java
b/fe/fe-core/src/test/java/org/apache/doris/fs/obj/AzureObjStorageTest.java
index 41f49b7eab1..f8869db9cf2 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/fs/obj/AzureObjStorageTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/fs/obj/AzureObjStorageTest.java
@@ -42,6 +42,7 @@ import java.nio.file.Paths;
import java.time.OffsetDateTime;
import java.util.ArrayList;
import java.util.HashMap;
+import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
@@ -58,19 +59,20 @@ public class AzureObjStorageTest {
}
public static List<I> genInputs() {
+ // refer genObjectKeys
List<I> inputs = new ArrayList<I>();
inputs.add(new I("s3://gavin-test-jp/azure-test/1/*/tmp*", 8196L));
inputs.add(new I("s3://gavin-test-jp/azure-test/1/tmp*", 4098L));
inputs.add(new I("s3://gavin-test-jp/azure-test/1/*tmp*", 4098L));
inputs.add(new I("s3://gavin-test-jp/azure-test/1/**/tmp*", 20490L));
inputs.add(new I("s3://gavin-test-jp/azure-test/**/tmp*", 32784L));
- inputs.add(new I("s3://gavin-test-jp/azure-test/*", 0L)); // no files
at 1st level
+ inputs.add(new I("s3://gavin-test-jp/azure-test/*", 3L)); // no files
at 1st level
inputs.add(new I("s3://gavin-test-jp/azure-test/2/*", 4098L));
inputs.add(new I("s3://gavin-test-jp/azure-test/2*/*", 4098L));
inputs.add(new I("s3://gavin-test-jp/azure-test/2/*I*", 591L));
- inputs.add(new I("s3://gavin-test-jp/azure-test/1", 0L));
- inputs.add(new I("s3://gavin-test-jp/azure-test/2", 0L));
- inputs.add(new I("s3://gavin-test-jp/azure-test/3", 0L));
+ inputs.add(new I("s3://gavin-test-jp/azure-test/1", 1L));
+ inputs.add(new I("s3://gavin-test-jp/azure-test/2", 1L));
+ inputs.add(new I("s3://gavin-test-jp/azure-test/3", 1L));
inputs.add(new I("s3://gavin-test-jp/azure-test/1/tmp.k*", 61L));
inputs.add(new I("s3://gavin-test-jp/azure-test/1/tmp.[a-z]*", 1722L));
inputs.add(new I("s3://gavin-test-jp/azure-test/[12]/tmp.[a-z]*",
3444L));
@@ -120,13 +122,13 @@ public class AzureObjStorageTest {
boolean fileNameOnly = false;
// FIXME(gavin): Mock the result returned from azure blob to make
this UT work when no aksk and network
Status st = azs.globList(i.pattern, result, fileNameOnly);
+ System.out.println("testGlobListWithMockedAzureStorage pattern: "
+ i.pattern + " matched " + result.size());
Assertions.assertTrue(st.ok());
Assertions.assertEquals(i.expectedMatchSize, result.size());
for (int j = 0; j < result.size() && j < 10; ++j) {
System.out.println(result.get(j).getName());
}
- System.out.println("pattern: " + i.pattern + " matched " +
result.size());
- System.out.println("====================");
+
});
}
@@ -136,13 +138,20 @@ public class AzureObjStorageTest {
String pattern = i.pattern.substring(19); // remove prefix
s3://gavin-test-jp/
PathMatcher matcher =
FileSystems.getDefault().getPathMatcher("glob:" + pattern);
List<String> matchedPaths = new ArrayList<>();
+ HashSet<String> directories = new HashSet<>();
for (String p : genObjectKeys()) {
java.nio.file.Path blobPath = Paths.get(p);
- if (!matcher.matches(blobPath)) {
- continue;
+
+ while (blobPath != null) {
+ if (matcher.matches(blobPath) &&
!directories.contains(blobPath.toString())) {
+ matchedPaths.add(blobPath.toString());
+ directories.add(blobPath.toString());
+ }
+ blobPath = blobPath.getParent();
}
- matchedPaths.add(p);
}
+ System.out.println("pattern: " + i.pattern + " matched " +
matchedPaths.size());
+ System.out.println("====================");
Assertions.assertEquals(i.expectedMatchSize, matchedPaths.size());
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]