This is an automated email from the ASF dual-hosted git repository.
abhishek pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git
The following commit(s) were added to refs/heads/master by this push:
new ea6ba40ce1e Add support for Azure Goverment storage (#15523)
ea6ba40ce1e is described below
commit ea6ba40ce1e1b776a85378b1c0f495483e45490a
Author: Misha <[email protected]>
AuthorDate: Tue Jan 9 18:03:32 2024 +0100
Add support for Azure Goverment storage (#15523)
Added support for Azure Government storage in Druid Azure-Extensions. This
enhancement allows the Azure-Extensions to be compatible with different Azure
storage types by updating the endpoint suffix from a hardcoded value to a
configurable one.
---
docs/development/extensions-core/azure.md | 1 +
.../druid/storage/azure/AzureAccountConfig.java | 19 +++++++++++++++++++
.../druid/storage/azure/AzureClientFactory.java | 2 +-
.../druid/storage/azure/AzureDataSegmentPuller.java | 9 +++++++--
.../druid/storage/azure/AzureDataSegmentPusher.java | 2 +-
.../druid/storage/azure/AzureStorageDruidModule.java | 4 ++--
.../org/apache/druid/storage/azure/AzureUtils.java | 9 +++++----
.../druid/storage/azure/AzureClientFactoryTest.java | 1 +
.../storage/azure/AzureDataSegmentPullerTest.java | 12 ++++++++----
.../storage/azure/AzureStorageDruidModuleTest.java | 20 ++++++++++++++++++++
.../apache/druid/storage/azure/AzureUtilsTest.java | 18 ++++++++++++++++--
11 files changed, 81 insertions(+), 16 deletions(-)
diff --git a/docs/development/extensions-core/azure.md
b/docs/development/extensions-core/azure.md
index 198ef3650dc..003f39cc554 100644
--- a/docs/development/extensions-core/azure.md
+++ b/docs/development/extensions-core/azure.md
@@ -42,5 +42,6 @@ To use this Apache Druid extension,
[include](../../configuration/extensions.md#
|`druid.azure.protocol`|the protocol to use|http or https|https|
|`druid.azure.maxTries`|Number of tries before canceling an Azure operation.|
|3|
|`druid.azure.maxListingLength`|maximum number of input files matching a given
prefix to retrieve at a time| |1024|
+|`druid.azure.endpointSuffix`|The endpoint suffix to use. Override the default
value to connect to [Azure
Government](https://learn.microsoft.com/en-us/azure/azure-government/documentation-government-get-started-connect-to-storage#getting-started-with-storage-api).|Examples:
`core.windows.net`, `core.usgovcloudapi.net`|`core.windows.net`|
See [Azure Services](http://azure.microsoft.com/en-us/pricing/free-trial/) for
more information.
diff --git
a/extensions-core/azure-extensions/src/main/java/org/apache/druid/storage/azure/AzureAccountConfig.java
b/extensions-core/azure-extensions/src/main/java/org/apache/druid/storage/azure/AzureAccountConfig.java
index 5826c68f1b7..97ec3c625b5 100644
---
a/extensions-core/azure-extensions/src/main/java/org/apache/druid/storage/azure/AzureAccountConfig.java
+++
b/extensions-core/azure-extensions/src/main/java/org/apache/druid/storage/azure/AzureAccountConfig.java
@@ -52,6 +52,9 @@ public class AzureAccountConfig
@JsonProperty
private Boolean useAzureCredentialsChain = Boolean.FALSE;
+ @JsonProperty
+ private String endpointSuffix = AzureUtils.DEFAULT_AZURE_ENDPOINT_SUFFIX;
+
@SuppressWarnings("unused") // Used by Jackson deserialization?
public void setProtocol(String protocol)
{
@@ -75,6 +78,12 @@ public class AzureAccountConfig
this.key = key;
}
+ @SuppressWarnings("unused") // Used by Jackson deserialization?
+ public void setEndpointSuffix(String endpointSuffix)
+ {
+ this.endpointSuffix = endpointSuffix;
+ }
+
public String getProtocol()
{
return protocol;
@@ -121,4 +130,14 @@ public class AzureAccountConfig
{
this.useAzureCredentialsChain = useAzureCredentialsChain;
}
+
+ public String getEndpointSuffix()
+ {
+ return endpointSuffix;
+ }
+
+ public String getBlobStorageEndpoint()
+ {
+ return "blob." + endpointSuffix;
+ }
}
diff --git
a/extensions-core/azure-extensions/src/main/java/org/apache/druid/storage/azure/AzureClientFactory.java
b/extensions-core/azure-extensions/src/main/java/org/apache/druid/storage/azure/AzureClientFactory.java
index 365b9d40dab..3625eaa813a 100644
---
a/extensions-core/azure-extensions/src/main/java/org/apache/druid/storage/azure/AzureClientFactory.java
+++
b/extensions-core/azure-extensions/src/main/java/org/apache/druid/storage/azure/AzureClientFactory.java
@@ -67,7 +67,7 @@ public class AzureClientFactory
private BlobServiceClientBuilder getAuthenticatedBlobServiceClientBuilder()
{
BlobServiceClientBuilder clientBuilder = new BlobServiceClientBuilder()
- .endpoint("https://" + config.getAccount() + ".blob.core.windows.net");
+ .endpoint("https://" + config.getAccount() + "." +
config.getBlobStorageEndpoint());
if (config.getKey() != null) {
clientBuilder.credential(new
StorageSharedKeyCredential(config.getAccount(), config.getKey()));
diff --git
a/extensions-core/azure-extensions/src/main/java/org/apache/druid/storage/azure/AzureDataSegmentPuller.java
b/extensions-core/azure-extensions/src/main/java/org/apache/druid/storage/azure/AzureDataSegmentPuller.java
index 571ecc68350..c20413b1169 100644
---
a/extensions-core/azure-extensions/src/main/java/org/apache/druid/storage/azure/AzureDataSegmentPuller.java
+++
b/extensions-core/azure-extensions/src/main/java/org/apache/druid/storage/azure/AzureDataSegmentPuller.java
@@ -38,11 +38,16 @@ public class AzureDataSegmentPuller
private final AzureByteSourceFactory byteSourceFactory;
+ private final AzureAccountConfig azureAccountConfig;
+
@Inject
public AzureDataSegmentPuller(
- AzureByteSourceFactory byteSourceFactory)
+ AzureByteSourceFactory byteSourceFactory,
+ AzureAccountConfig azureAccountConfig
+ )
{
this.byteSourceFactory = byteSourceFactory;
+ this.azureAccountConfig = azureAccountConfig;
}
FileUtils.FileCopyResult getSegmentFiles(
@@ -59,7 +64,7 @@ public class AzureDataSegmentPuller
"Loading container: [%s], with blobPath: [%s] and outDir: [%s]",
containerName, blobPath, outDir
);
- final String actualBlobPath =
AzureUtils.maybeRemoveAzurePathPrefix(blobPath);
+ final String actualBlobPath =
AzureUtils.maybeRemoveAzurePathPrefix(blobPath,
azureAccountConfig.getBlobStorageEndpoint());
final ByteSource byteSource = byteSourceFactory.create(containerName,
actualBlobPath);
final FileUtils.FileCopyResult result = CompressionUtils.unzip(
diff --git
a/extensions-core/azure-extensions/src/main/java/org/apache/druid/storage/azure/AzureDataSegmentPusher.java
b/extensions-core/azure-extensions/src/main/java/org/apache/druid/storage/azure/AzureDataSegmentPusher.java
index e2ce8bbb88d..8180b362b66 100644
---
a/extensions-core/azure-extensions/src/main/java/org/apache/druid/storage/azure/AzureDataSegmentPusher.java
+++
b/extensions-core/azure-extensions/src/main/java/org/apache/druid/storage/azure/AzureDataSegmentPusher.java
@@ -78,7 +78,7 @@ public class AzureDataSegmentPusher implements
DataSegmentPusher
AzureUtils.AZURE_STORAGE_HADOOP_PROTOCOL,
segmentConfig.getContainer(),
accountConfig.getAccount(),
- AzureUtils.AZURE_STORAGE_HOST_ADDRESS,
+ accountConfig.getBlobStorageEndpoint(),
prefixIsNullOrEmpty ? "" :
StringUtils.maybeRemoveTrailingSlash(prefix) + '/'
);
diff --git
a/extensions-core/azure-extensions/src/main/java/org/apache/druid/storage/azure/AzureStorageDruidModule.java
b/extensions-core/azure-extensions/src/main/java/org/apache/druid/storage/azure/AzureStorageDruidModule.java
index ec4ed8fa201..25db821601b 100644
---
a/extensions-core/azure-extensions/src/main/java/org/apache/druid/storage/azure/AzureStorageDruidModule.java
+++
b/extensions-core/azure-extensions/src/main/java/org/apache/druid/storage/azure/AzureStorageDruidModule.java
@@ -47,9 +47,9 @@ public class AzureStorageDruidModule implements DruidModule
public static final String SCHEME = "azure";
public static final String
- STORAGE_CONNECTION_STRING_WITH_KEY =
"DefaultEndpointsProtocol=%s;AccountName=%s;AccountKey=%s";
+ STORAGE_CONNECTION_STRING_WITH_KEY =
"DefaultEndpointsProtocol=%s;AccountName=%s;AccountKey=%s;EndpointSuffix=%s;";
public static final String
- STORAGE_CONNECTION_STRING_WITH_TOKEN =
"DefaultEndpointsProtocol=%s;AccountName=%s;SharedAccessSignature=%s";
+ STORAGE_CONNECTION_STRING_WITH_TOKEN =
"DefaultEndpointsProtocol=%s;AccountName=%s;SharedAccessSignature=%s;EndpointSuffix=%s;";
public static final String INDEX_ZIP_FILE_NAME = "index.zip";
@Override
diff --git
a/extensions-core/azure-extensions/src/main/java/org/apache/druid/storage/azure/AzureUtils.java
b/extensions-core/azure-extensions/src/main/java/org/apache/druid/storage/azure/AzureUtils.java
index 5309b016d5e..0c00a633d77 100644
---
a/extensions-core/azure-extensions/src/main/java/org/apache/druid/storage/azure/AzureUtils.java
+++
b/extensions-core/azure-extensions/src/main/java/org/apache/druid/storage/azure/AzureUtils.java
@@ -38,11 +38,12 @@ import java.util.concurrent.TimeoutException;
public class AzureUtils
{
+ public static final String DEFAULT_AZURE_ENDPOINT_SUFFIX =
"core.windows.net";
@VisibleForTesting
static final String AZURE_STORAGE_HOST_ADDRESS = "blob.core.windows.net";
// The azure storage hadoop access pattern is:
- // wasb[s]://<containername>@<accountname>.blob.core.windows.net/<path>
+ // wasb[s]://<containername>@<accountname>.blob.<endpointSuffix>/<path>
// (from
https://docs.microsoft.com/en-us/azure/hdinsight/hdinsight-hadoop-use-blob-storage)
static final String AZURE_STORAGE_HADOOP_PROTOCOL = "wasbs";
@@ -88,14 +89,14 @@ public class AzureUtils
* @return a String representing the blob path component of the uri with any
leading 'blob.core.windows.net/' string
* removed characters removed.
*/
- public static String maybeRemoveAzurePathPrefix(String blobPath)
+ public static String maybeRemoveAzurePathPrefix(String blobPath, String
blobStorageEndpointSuffix)
{
- boolean blobPathIsHadoop = blobPath.contains(AZURE_STORAGE_HOST_ADDRESS);
+ boolean blobPathIsHadoop = blobPath.contains(blobStorageEndpointSuffix);
if (blobPathIsHadoop) {
// Remove azure's hadoop prefix to match realtime ingestion path
return blobPath.substring(
- blobPath.indexOf(AZURE_STORAGE_HOST_ADDRESS) +
AZURE_STORAGE_HOST_ADDRESS.length() + 1);
+ blobPath.indexOf(blobStorageEndpointSuffix) +
blobStorageEndpointSuffix.length() + 1);
} else {
return blobPath;
}
diff --git
a/extensions-core/azure-extensions/src/test/java/org/apache/druid/storage/azure/AzureClientFactoryTest.java
b/extensions-core/azure-extensions/src/test/java/org/apache/druid/storage/azure/AzureClientFactoryTest.java
index ffc4a8bb801..bbf07b402dd 100644
---
a/extensions-core/azure-extensions/src/test/java/org/apache/druid/storage/azure/AzureClientFactoryTest.java
+++
b/extensions-core/azure-extensions/src/test/java/org/apache/druid/storage/azure/AzureClientFactoryTest.java
@@ -133,6 +133,7 @@ public class AzureClientFactoryTest
EasyMock.expect(config.getKey()).andReturn("key").times(2);
EasyMock.expect(config.getAccount()).andReturn(ACCOUNT).times(2);
EasyMock.expect(config.getMaxTries()).andReturn(3);
+
EasyMock.expect(config.getBlobStorageEndpoint()).andReturn(AzureUtils.AZURE_STORAGE_HOST_ADDRESS);
azureClientFactory = new AzureClientFactory(config);
EasyMock.replay(config);
azureClientFactory.getBlobServiceClient(null);
diff --git
a/extensions-core/azure-extensions/src/test/java/org/apache/druid/storage/azure/AzureDataSegmentPullerTest.java
b/extensions-core/azure-extensions/src/test/java/org/apache/druid/storage/azure/AzureDataSegmentPullerTest.java
index b3b67c4f13b..ac851877c53 100644
---
a/extensions-core/azure-extensions/src/test/java/org/apache/druid/storage/azure/AzureDataSegmentPullerTest.java
+++
b/extensions-core/azure-extensions/src/test/java/org/apache/druid/storage/azure/AzureDataSegmentPullerTest.java
@@ -60,13 +60,14 @@ public class AzureDataSegmentPullerTest extends
EasyMockSupport
final File toDir = FileUtils.createTempDir();
try {
final InputStream zipStream = new FileInputStream(pulledFile);
+ final AzureAccountConfig config = new AzureAccountConfig();
EasyMock.expect(byteSourceFactory.create(CONTAINER_NAME,
BLOB_PATH)).andReturn(new AzureByteSource(azureStorage, CONTAINER_NAME,
BLOB_PATH));
EasyMock.expect(azureStorage.getBlockBlobInputStream(0L, CONTAINER_NAME,
BLOB_PATH)).andReturn(zipStream);
replayAll();
- AzureDataSegmentPuller puller = new
AzureDataSegmentPuller(byteSourceFactory);
+ AzureDataSegmentPuller puller = new
AzureDataSegmentPuller(byteSourceFactory, config);
FileUtils.FileCopyResult result = puller.getSegmentFiles(CONTAINER_NAME,
BLOB_PATH, toDir);
@@ -92,13 +93,14 @@ public class AzureDataSegmentPullerTest extends
EasyMockSupport
final File toDir = FileUtils.createTempDir();
try {
final InputStream zipStream = new FileInputStream(pulledFile);
+ final AzureAccountConfig config = new AzureAccountConfig();
EasyMock.expect(byteSourceFactory.create(CONTAINER_NAME,
BLOB_PATH)).andReturn(new AzureByteSource(azureStorage, CONTAINER_NAME,
BLOB_PATH));
EasyMock.expect(azureStorage.getBlockBlobInputStream(0L, CONTAINER_NAME,
BLOB_PATH)).andReturn(zipStream);
replayAll();
- AzureDataSegmentPuller puller = new
AzureDataSegmentPuller(byteSourceFactory);
+ AzureDataSegmentPuller puller = new
AzureDataSegmentPuller(byteSourceFactory, config);
FileUtils.FileCopyResult result = puller.getSegmentFiles(CONTAINER_NAME,
BLOB_PATH_HADOOP, toDir);
@@ -119,6 +121,7 @@ public class AzureDataSegmentPullerTest extends
EasyMockSupport
public void
test_getSegmentFiles_nonRecoverableErrorRaisedWhenPullingSegmentFiles_doNotDeleteOutputDirectory()
throws IOException, BlobStorageException, SegmentLoadingException
{
+ final AzureAccountConfig config = new AzureAccountConfig();
final File outDir = FileUtils.createTempDir();
try {
@@ -131,7 +134,7 @@ public class AzureDataSegmentPullerTest extends
EasyMockSupport
replayAll();
- AzureDataSegmentPuller puller = new
AzureDataSegmentPuller(byteSourceFactory);
+ AzureDataSegmentPuller puller = new
AzureDataSegmentPuller(byteSourceFactory, config);
puller.getSegmentFiles(CONTAINER_NAME, BLOB_PATH, outDir);
}
@@ -149,6 +152,7 @@ public class AzureDataSegmentPullerTest extends
EasyMockSupport
public void
test_getSegmentFiles_recoverableErrorRaisedWhenPullingSegmentFiles_deleteOutputDirectory()
throws IOException, BlobStorageException, SegmentLoadingException
{
+ final AzureAccountConfig config = new AzureAccountConfig();
final File outDir = FileUtils.createTempDir();
try {
@@ -163,7 +167,7 @@ public class AzureDataSegmentPullerTest extends
EasyMockSupport
EasyMock.replay(azureStorage);
EasyMock.replay(byteSourceFactory);
- AzureDataSegmentPuller puller = new
AzureDataSegmentPuller(byteSourceFactory);
+ AzureDataSegmentPuller puller = new
AzureDataSegmentPuller(byteSourceFactory, config);
puller.getSegmentFiles(CONTAINER_NAME, BLOB_PATH, outDir);
diff --git
a/extensions-core/azure-extensions/src/test/java/org/apache/druid/storage/azure/AzureStorageDruidModuleTest.java
b/extensions-core/azure-extensions/src/test/java/org/apache/druid/storage/azure/AzureStorageDruidModuleTest.java
index 6d5efbae501..27d02cd2354 100644
---
a/extensions-core/azure-extensions/src/test/java/org/apache/druid/storage/azure/AzureStorageDruidModuleTest.java
+++
b/extensions-core/azure-extensions/src/test/java/org/apache/druid/storage/azure/AzureStorageDruidModuleTest.java
@@ -259,6 +259,26 @@ public class AzureStorageDruidModuleTest extends
EasyMockSupport
);
}
+ @Test
+ public void testGetBlobStorageEndpointWithDefaultProperties()
+ {
+ Properties properties = initializePropertes();
+ AzureAccountConfig config =
makeInjectorWithProperties(properties).getInstance(AzureAccountConfig.class);
+ Assert.assertEquals(config.getEndpointSuffix(),
AzureUtils.DEFAULT_AZURE_ENDPOINT_SUFFIX);
+ Assert.assertEquals(config.getBlobStorageEndpoint(),
AzureUtils.AZURE_STORAGE_HOST_ADDRESS);
+ }
+
+ @Test
+ public void testGetBlobStorageEndpointWithCustomBlobPath()
+ {
+ Properties properties = initializePropertes();
+ final String customSuffix = "core.usgovcloudapi.net";
+ properties.setProperty("druid.azure.endpointSuffix", customSuffix);
+ AzureAccountConfig config =
makeInjectorWithProperties(properties).getInstance(AzureAccountConfig.class);
+ Assert.assertEquals(config.getEndpointSuffix(), customSuffix);
+ Assert.assertEquals(config.getBlobStorageEndpoint(), "blob." +
customSuffix);
+ }
+
private Injector makeInjectorWithProperties(final Properties props)
{
return Guice.createInjector(
diff --git
a/extensions-core/azure-extensions/src/test/java/org/apache/druid/storage/azure/AzureUtilsTest.java
b/extensions-core/azure-extensions/src/test/java/org/apache/druid/storage/azure/AzureUtilsTest.java
index 0222100c43e..4a28c4de4cc 100644
---
a/extensions-core/azure-extensions/src/test/java/org/apache/druid/storage/azure/AzureUtilsTest.java
+++
b/extensions-core/azure-extensions/src/test/java/org/apache/druid/storage/azure/AzureUtilsTest.java
@@ -85,14 +85,14 @@ public class AzureUtilsTest extends EasyMockSupport
@Test
public void
test_maybeRemoveAzurePathPrefix_pathHasLeadingAzurePathPrefix_returnsPathWithLeadingAzurePathRemoved()
{
- String path =
AzureUtils.maybeRemoveAzurePathPrefix(BLOB_PATH_WITH_LEADING_AZURE_PREFIX);
+ String path =
AzureUtils.maybeRemoveAzurePathPrefix(BLOB_PATH_WITH_LEADING_AZURE_PREFIX,
AzureUtils.AZURE_STORAGE_HOST_ADDRESS);
Assert.assertEquals(BLOB_NAME, path);
}
@Test
public void
test_maybeRemoveAzurePathPrefix_pathDoesNotHaveAzurePathPrefix__returnsPathWithLeadingAzurePathRemoved()
{
- String path = AzureUtils.maybeRemoveAzurePathPrefix(BLOB_NAME);
+ String path = AzureUtils.maybeRemoveAzurePathPrefix(BLOB_NAME,
AzureUtils.AZURE_STORAGE_HOST_ADDRESS);
Assert.assertEquals(BLOB_NAME, path);
}
@@ -206,4 +206,18 @@ public class AzureUtilsTest extends EasyMockSupport
boolean retry =
AzureUtils.AZURE_RETRY.apply(RUNTIME_EXCEPTION_WRAPPED_IN_RUNTIME_EXCEPTON);
Assert.assertFalse(retry);
}
+
+ @Test
+ public void testRemoveAzurePathPrefixDefaultEndpoint()
+ {
+ String outputBlob =
AzureUtils.maybeRemoveAzurePathPrefix("blob.core.windows.net/container/blob",
"blob.core.windows.net");
+ Assert.assertEquals("container/blob", outputBlob);
+ }
+
+ @Test
+ public void testRemoveAzurePathPrefixCustomEndpoint()
+ {
+ String outputBlob =
AzureUtils.maybeRemoveAzurePathPrefix("blob.core.usgovcloudapi.net/container/blob",
"blob.core.usgovcloudapi.net");
+ Assert.assertEquals("container/blob", outputBlob);
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]