This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new 9b375be9623 branch-3.0: [fix] let backup work on azure #46411 (#46445)
9b375be9623 is described below

commit 9b375be9623a0c226917165c90f9b75fcff88f96
Author: github-actions[bot] 
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Fri Jan 10 12:11:42 2025 +0800

    branch-3.0: [fix] let backup work on azure #46411 (#46445)
    
    Cherry-picked from #46411
    
    Co-authored-by: Yongqiang YANG <[email protected]>
---
 be/src/io/fs/azure_obj_storage_client.cpp          |  2 +-
 be/src/util/s3_util.cpp                            |  4 +-
 .../org/apache/doris/analysis/StorageBackend.java  |  1 +
 .../datasource/property/S3ClientBEProperties.java  |  3 ++
 .../org/apache/doris/fs/obj/AzureObjStorage.java   | 46 ++++++++++++++++------
 .../java/org/apache/doris/fs/obj/S3ObjStorage.java |  5 +++
 .../apache/doris/fs/remote/AzureFileSystem.java    |  5 +--
 .../org/apache/doris/persist/gson/GsonUtils.java   |  4 +-
 .../datasource/property/PropertyConverterTest.java |  2 +-
 .../apache/doris/fs/obj/AzureObjStorageTest.java   | 27 ++++++++-----
 10 files changed, 71 insertions(+), 28 deletions(-)

diff --git a/be/src/io/fs/azure_obj_storage_client.cpp 
b/be/src/io/fs/azure_obj_storage_client.cpp
index 9f33db3400a..44d45077ebc 100644
--- a/be/src/io/fs/azure_obj_storage_client.cpp
+++ b/be/src/io/fs/azure_obj_storage_client.cpp
@@ -311,7 +311,7 @@ ObjectStorageResponse 
AzureObjStorageClient::list_objects(const ObjectStoragePat
                     return _client->ListBlobs(list_opts);
                 });
                 get_file_file(resp);
-                while (!resp.NextPageToken->empty()) {
+                while (resp.NextPageToken.HasValue()) {
                     list_opts.ContinuationToken = resp.NextPageToken;
                     resp = s3_get_rate_limit([&]() {
                         SCOPED_BVAR_LATENCY(s3_bvar::s3_list_latency);
diff --git a/be/src/util/s3_util.cpp b/be/src/util/s3_util.cpp
index f87389b6b3e..dece1074e60 100644
--- a/be/src/util/s3_util.cpp
+++ b/be/src/util/s3_util.cpp
@@ -254,8 +254,8 @@ std::shared_ptr<io::ObjStorageClient> 
S3ClientFactory::_create_azure_client(
             
std::make_shared<Azure::Storage::StorageSharedKeyCredential>(s3_conf.ak, 
s3_conf.sk);
 
     const std::string container_name = s3_conf.bucket;
-    const std::string uri = fmt::format("{}://{}.blob.core.windows.net/{}",
-                                        config::s3_client_http_scheme, 
s3_conf.ak, container_name);
+    const std::string uri =
+            fmt::format("{}://{}.blob.core.windows.net/{}", "https", 
s3_conf.ak, container_name);
 
     auto containerClient = 
std::make_shared<Azure::Storage::Blobs::BlobContainerClient>(uri, cred);
     LOG_INFO("create one azure client with {}", s3_conf.to_string());
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/analysis/StorageBackend.java 
b/fe/fe-core/src/main/java/org/apache/doris/analysis/StorageBackend.java
index b07725d2507..67a76cec450 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/StorageBackend.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/StorageBackend.java
@@ -178,6 +178,7 @@ public class StorageBackend implements ParseNode {
                     return TStorageBackendType.JFS;
                 case LOCAL:
                     return TStorageBackendType.LOCAL;
+                // deprecated
                 case AZURE:
                     return TStorageBackendType.AZURE;
                 default:
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/S3ClientBEProperties.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/S3ClientBEProperties.java
index 7d8c2668fea..093b74b80ae 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/S3ClientBEProperties.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/S3ClientBEProperties.java
@@ -85,6 +85,9 @@ public class S3ClientBEProperties {
         if (properties.containsKey(PropertyConverter.USE_PATH_STYLE)) {
             beProperties.put(PropertyConverter.USE_PATH_STYLE, 
properties.get(PropertyConverter.USE_PATH_STYLE));
         }
+        if (properties.containsKey(S3Properties.PROVIDER)) {
+            beProperties.put(S3Properties.PROVIDER, 
properties.get(S3Properties.PROVIDER));
+        }
         return beProperties;
     }
 }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/fs/obj/AzureObjStorage.java 
b/fe/fe-core/src/main/java/org/apache/doris/fs/obj/AzureObjStorage.java
index 780d2ab9fa3..0253993cc42 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/fs/obj/AzureObjStorage.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/fs/obj/AzureObjStorage.java
@@ -36,6 +36,7 @@ import com.azure.storage.blob.BlobServiceClientBuilder;
 import com.azure.storage.blob.batch.BlobBatch;
 import com.azure.storage.blob.batch.BlobBatchClient;
 import com.azure.storage.blob.batch.BlobBatchClientBuilder;
+import com.azure.storage.blob.models.BlobErrorCode;
 import com.azure.storage.blob.models.BlobItem;
 import com.azure.storage.blob.models.BlobProperties;
 import com.azure.storage.blob.models.BlobStorageException;
@@ -53,6 +54,7 @@ import java.nio.file.FileSystems;
 import java.nio.file.PathMatcher;
 import java.nio.file.Paths;
 import java.util.ArrayList;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.TreeMap;
@@ -196,6 +198,9 @@ public class AzureObjStorage implements 
ObjStorage<BlobServiceClient> {
             LOG.info("delete file " + remotePath + " success");
             return Status.OK;
         } catch (BlobStorageException e) {
+            if (e.getErrorCode() == BlobErrorCode.BLOB_NOT_FOUND) {
+                return Status.OK;
+            }
             return new Status(
                     Status.ErrCode.COMMON_ERROR,
                     "get file from azure error: " + e.getServiceMessage());
@@ -331,6 +336,7 @@ public class AzureObjStorage implements 
ObjStorage<BlobServiceClient> {
             LOG.info("path pattern {}", pathPattern.toString());
             PathMatcher matcher = 
FileSystems.getDefault().getPathMatcher("glob:" + pathPattern.toString());
 
+            HashSet<String> directorySet = new HashSet<>();
             String listPrefix = getLongestPrefix(globPath);
             LOG.info("azure glob list prefix is {}", listPrefix);
             ListBlobsOptions options = new 
ListBlobsOptions().setPrefix(listPrefix);
@@ -343,18 +349,36 @@ public class AzureObjStorage implements 
ObjStorage<BlobServiceClient> {
                     elementCnt++;
                     java.nio.file.Path blobPath = 
Paths.get(blobItem.getName());
 
-                    if (!matcher.matches(blobPath)) {
-                        continue;
+                    boolean isPrefix = false;
+                    while 
(blobPath.normalize().toString().startsWith(listPrefix)) {
+                        if (LOG.isDebugEnabled()) {
+                            LOG.debug("get blob {}", 
blobPath.normalize().toString());
+                        }
+                        if (!matcher.matches(blobPath)) {
+                            isPrefix = true;
+                            blobPath = blobPath.getParent();
+                            continue;
+                        }
+                        if 
(directorySet.contains(blobPath.normalize().toString())) {
+                            break;
+                        }
+                        if (isPrefix) {
+                            directorySet.add(blobPath.normalize().toString());
+                        }
+
+                        matchCnt++;
+                        RemoteFile remoteFile = new RemoteFile(
+                                fileNameOnly ? 
blobPath.getFileName().toString() : constructS3Path(blobPath.toString(),
+                                        uri.getBucket()),
+                                !isPrefix,
+                                isPrefix ? -1 : 
blobItem.getProperties().getContentLength(),
+                                isPrefix ? -1 : 
blobItem.getProperties().getContentLength(),
+                                isPrefix ? 0 : 
blobItem.getProperties().getLastModified().getSecond());
+                        result.add(remoteFile);
+
+                        blobPath = blobPath.getParent();
+                        isPrefix = true;
                     }
-                    matchCnt++;
-                    RemoteFile remoteFile = new RemoteFile(
-                            fileNameOnly ? blobPath.getFileName().toString() : 
constructS3Path(blobPath.toString(),
-                                    uri.getBucket()),
-                            !blobItem.isPrefix(),
-                            blobItem.isPrefix() ? -1 : 
blobItem.getProperties().getContentLength(),
-                            blobItem.getProperties().getContentLength(),
-                            
blobItem.getProperties().getLastModified().getSecond());
-                    result.add(remoteFile);
                 }
                 newContinuationToken = pagedResponse.getContinuationToken();
             } while (newContinuationToken != null);
diff --git a/fe/fe-core/src/main/java/org/apache/doris/fs/obj/S3ObjStorage.java 
b/fe/fe-core/src/main/java/org/apache/doris/fs/obj/S3ObjStorage.java
index 69a8ac4d794..edcb54bf8fa 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/fs/obj/S3ObjStorage.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/fs/obj/S3ObjStorage.java
@@ -102,6 +102,11 @@ public class S3ObjStorage implements ObjStorage<S3Client> {
                 .equalsIgnoreCase("true");
         forceParsingByStandardUri = 
this.properties.getOrDefault(PropertyConverter.FORCE_PARSING_BY_STANDARD_URI,
                 "false").equalsIgnoreCase("true");
+
+        String endpoint = this.properties.get(S3Properties.ENDPOINT);
+        String region = this.properties.get(S3Properties.REGION);
+
+        this.properties.put(S3Properties.REGION, 
PropertyConverter.checkRegion(endpoint, region, S3Properties.REGION));
     }
 
     @Override
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/fs/remote/AzureFileSystem.java 
b/fe/fe-core/src/main/java/org/apache/doris/fs/remote/AzureFileSystem.java
index 5004cfd2f12..c116182d3a4 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/fs/remote/AzureFileSystem.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/fs/remote/AzureFileSystem.java
@@ -17,7 +17,6 @@
 
 package org.apache.doris.fs.remote;
 
-import org.apache.doris.analysis.StorageBackend;
 import org.apache.doris.analysis.StorageBackend.StorageType;
 import org.apache.doris.backup.Status;
 import org.apache.doris.common.UserException;
@@ -35,13 +34,13 @@ public class AzureFileSystem extends ObjFileSystem {
     private static final Logger LOG = 
LogManager.getLogger(AzureFileSystem.class);
 
     public AzureFileSystem(Map<String, String> properties) {
-        super(StorageType.AZURE.name(), StorageType.AZURE, new 
AzureObjStorage(properties));
+        super(StorageType.AZURE.name(), StorageType.S3, new 
AzureObjStorage(properties));
         initFsProperties();
     }
 
     @VisibleForTesting
     public AzureFileSystem(AzureObjStorage storage) {
-        super(StorageBackend.StorageType.AZURE.name(), 
StorageBackend.StorageType.AZURE, storage);
+        super(StorageType.AZURE.name(), StorageType.S3, storage);
         initFsProperties();
     }
 
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/persist/gson/GsonUtils.java 
b/fe/fe-core/src/main/java/org/apache/doris/persist/gson/GsonUtils.java
index ee6f2f74eac..ec25bfa134d 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/persist/gson/GsonUtils.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/persist/gson/GsonUtils.java
@@ -172,6 +172,7 @@ import 
org.apache.doris.datasource.trinoconnector.TrinoConnectorExternalCatalog;
 import 
org.apache.doris.datasource.trinoconnector.TrinoConnectorExternalDatabase;
 import org.apache.doris.datasource.trinoconnector.TrinoConnectorExternalTable;
 import org.apache.doris.fs.PersistentFileSystem;
+import org.apache.doris.fs.remote.AzureFileSystem;
 import org.apache.doris.fs.remote.BrokerFileSystem;
 import org.apache.doris.fs.remote.ObjFileSystem;
 import org.apache.doris.fs.remote.S3FileSystem;
@@ -566,7 +567,8 @@ public class GsonUtils {
             .registerSubtype(JFSFileSystem.class, 
JFSFileSystem.class.getSimpleName())
             .registerSubtype(OFSFileSystem.class, 
OFSFileSystem.class.getSimpleName())
             .registerSubtype(ObjFileSystem.class, 
ObjFileSystem.class.getSimpleName())
-            .registerSubtype(S3FileSystem.class, 
S3FileSystem.class.getSimpleName());
+            .registerSubtype(S3FileSystem.class, 
S3FileSystem.class.getSimpleName())
+            .registerSubtype(AzureFileSystem.class, 
AzureFileSystem.class.getSimpleName());
 
     private static 
RuntimeTypeAdapterFactory<org.apache.doris.backup.AbstractJob>
             jobBackupTypeAdapterFactory
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/datasource/property/PropertyConverterTest.java
 
b/fe/fe-core/src/test/java/org/apache/doris/datasource/property/PropertyConverterTest.java
index 8967ca5fae0..eacd0bacbb3 100644
--- 
a/fe/fe-core/src/test/java/org/apache/doris/datasource/property/PropertyConverterTest.java
+++ 
b/fe/fe-core/src/test/java/org/apache/doris/datasource/property/PropertyConverterTest.java
@@ -195,7 +195,7 @@ public class PropertyConverterTest extends 
TestWithFeService {
         CreateRepositoryStmt analyzedStmtNew = createStmt(s3RepoNew);
         Assertions.assertEquals(analyzedStmtNew.getProperties().size(), 3);
         Repository repositoryNew = getRepository(analyzedStmtNew, 
"s3_repo_new");
-        
Assertions.assertEquals(repositoryNew.getRemoteFileSystem().getProperties().size(),
 4);
+        
Assertions.assertEquals(repositoryNew.getRemoteFileSystem().getProperties().size(),
 5);
     }
 
     private static Repository getRepository(CreateRepositoryStmt analyzedStmt, 
String name) throws DdlException {
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/fs/obj/AzureObjStorageTest.java 
b/fe/fe-core/src/test/java/org/apache/doris/fs/obj/AzureObjStorageTest.java
index 41f49b7eab1..f8869db9cf2 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/fs/obj/AzureObjStorageTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/fs/obj/AzureObjStorageTest.java
@@ -42,6 +42,7 @@ import java.nio.file.Paths;
 import java.time.OffsetDateTime;
 import java.util.ArrayList;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.stream.Collectors;
@@ -58,19 +59,20 @@ public class AzureObjStorageTest {
     }
 
     public static List<I> genInputs() {
+        // refer genObjectKeys
         List<I> inputs = new ArrayList<I>();
         inputs.add(new I("s3://gavin-test-jp/azure-test/1/*/tmp*", 8196L));
         inputs.add(new I("s3://gavin-test-jp/azure-test/1/tmp*", 4098L));
         inputs.add(new I("s3://gavin-test-jp/azure-test/1/*tmp*", 4098L));
         inputs.add(new I("s3://gavin-test-jp/azure-test/1/**/tmp*", 20490L));
         inputs.add(new I("s3://gavin-test-jp/azure-test/**/tmp*", 32784L));
-        inputs.add(new I("s3://gavin-test-jp/azure-test/*", 0L)); // no files 
at 1st level
+        inputs.add(new I("s3://gavin-test-jp/azure-test/*", 3L)); // no files 
at 1st level
         inputs.add(new I("s3://gavin-test-jp/azure-test/2/*", 4098L));
         inputs.add(new I("s3://gavin-test-jp/azure-test/2*/*", 4098L));
         inputs.add(new I("s3://gavin-test-jp/azure-test/2/*I*", 591L));
-        inputs.add(new I("s3://gavin-test-jp/azure-test/1", 0L));
-        inputs.add(new I("s3://gavin-test-jp/azure-test/2", 0L));
-        inputs.add(new I("s3://gavin-test-jp/azure-test/3", 0L));
+        inputs.add(new I("s3://gavin-test-jp/azure-test/1", 1L));
+        inputs.add(new I("s3://gavin-test-jp/azure-test/2", 1L));
+        inputs.add(new I("s3://gavin-test-jp/azure-test/3", 1L));
         inputs.add(new I("s3://gavin-test-jp/azure-test/1/tmp.k*", 61L));
         inputs.add(new I("s3://gavin-test-jp/azure-test/1/tmp.[a-z]*", 1722L));
         inputs.add(new I("s3://gavin-test-jp/azure-test/[12]/tmp.[a-z]*", 
3444L));
@@ -120,13 +122,13 @@ public class AzureObjStorageTest {
             boolean fileNameOnly = false;
             // FIXME(gavin): Mock the result returned from azure blob to make 
this UT work when no aksk and network
             Status st = azs.globList(i.pattern, result, fileNameOnly);
+            System.out.println("testGlobListWithMockedAzureStorage pattern: " 
+ i.pattern + " matched " + result.size());
             Assertions.assertTrue(st.ok());
             Assertions.assertEquals(i.expectedMatchSize, result.size());
             for (int j = 0; j < result.size() && j < 10; ++j) {
                 System.out.println(result.get(j).getName());
             }
-            System.out.println("pattern: " + i.pattern + " matched " + 
result.size());
-            System.out.println("====================");
+
         });
     }
 
@@ -136,13 +138,20 @@ public class AzureObjStorageTest {
             String pattern = i.pattern.substring(19); // remove prefix 
s3://gavin-test-jp/
             PathMatcher matcher = 
FileSystems.getDefault().getPathMatcher("glob:" + pattern);
             List<String> matchedPaths = new ArrayList<>();
+            HashSet<String> directories = new HashSet<>();
             for (String p : genObjectKeys()) {
                 java.nio.file.Path blobPath = Paths.get(p);
-                if (!matcher.matches(blobPath)) {
-                    continue;
+
+                while (blobPath != null) {
+                    if (matcher.matches(blobPath) && 
!directories.contains(blobPath.toString())) {
+                        matchedPaths.add(blobPath.toString());
+                        directories.add(blobPath.toString());
+                    }
+                    blobPath = blobPath.getParent();
                 }
-                matchedPaths.add(p);
             }
+            System.out.println("pattern: " + i.pattern + " matched " + 
matchedPaths.size());
+            System.out.println("====================");
             Assertions.assertEquals(i.expectedMatchSize, matchedPaths.size());
         }
     }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to