This is an automated email from the ASF dual-hosted git repository. mblow pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/asterixdb.git
commit cacae0531e03a83748a857aff680f72836c06d45 Author: preetham0202 <[email protected]> AuthorDate: Thu Nov 20 19:12:17 2025 +0530 [NO ISSUE] Fix parquet azure with dynamic prefix Ext-ref: MB-69466 Change-Id: I81c33dae177fb59256eeb82e04aef4db5533ee37 Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/20593 Tested-by: Hussain Towaileb <[email protected]> Integration-Tests: Hussain Towaileb <[email protected]> Reviewed-by: Hussain Towaileb <[email protected]> --- .../asterix/external/util/ExternalDataConstants.java | 2 ++ .../asterix/external/util/ExternalDataUtils.java | 19 +++++++++++++------ .../asterix/external/util/azure/blob/BlobUtils.java | 17 +++++++++++++++++ .../external/util/azure/datalake/DatalakeUtils.java | 17 +++++++++++++++++ 4 files changed, 49 insertions(+), 6 deletions(-) diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java index 2846cf1342..0eb935efdc 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java @@ -153,7 +153,9 @@ public class ExternalDataConstants { public static final String KEY_ADAPTER_NAME_HTTP = "http_adapter"; public static final String KEY_ADAPTER_NAME_AWS_S3 = "S3"; public static final String KEY_ADAPTER_NAME_AZURE_BLOB = "AZUREBLOB"; + public static final String KEY_ADAPTER_NAME_AZURE_BLOB_ALIAS = "AZURE_BLOB"; public static final String KEY_ADAPTER_NAME_AZURE_DATA_LAKE = "AZUREDATALAKE"; + public static final String KEY_ADAPTER_NAME_AZURE_DATA_LAKE_ALIAS = "AZURE_DATALAKE"; public static final String KEY_ADAPTER_NAME_GCS = "GCS"; public static final String KEY_ADAPTER_NAME_HDFS = "HDFS"; diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java index de31531de4..83c3a8bf90 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java @@ -87,6 +87,8 @@ import org.apache.asterix.external.util.aws.AwsConstants; import org.apache.asterix.external.util.aws.s3.S3Constants; import org.apache.asterix.external.util.aws.s3.S3Utils; import org.apache.asterix.external.util.azure.AzureConstants; +import org.apache.asterix.external.util.azure.blob.BlobUtils; +import org.apache.asterix.external.util.azure.datalake.DatalakeUtils; import org.apache.asterix.external.util.google.GCSConstants; import org.apache.asterix.external.util.google.GCSUtils; import org.apache.asterix.om.types.ARecordType; @@ -1137,26 +1139,32 @@ public class ExternalDataUtils { return configuration.getOrDefault(DEFINITION_FIELD_NAME, configuration.get(KEY_PATH)); } - public static String getProtocolContainerPair(Map<String, String> configurations) { + public static String getProtocolContainerPair(Map<String, String> configurations) throws CompilationException { String container = configurations.getOrDefault(ExternalDataConstants.CONTAINER_NAME_FIELD_NAME, ""); String type = configurations.getOrDefault(ExternalDataConstants.KEY_EXTERNAL_SOURCE_TYPE, ""); String protocol; switch (type) { case ExternalDataConstants.KEY_ADAPTER_NAME_AWS_S3: protocol = S3Constants.HADOOP_S3_PROTOCOL; - break; + return protocol + "://" + container + "/"; case ExternalDataConstants.KEY_ADAPTER_NAME_AZURE_BLOB: + case ExternalDataConstants.KEY_ADAPTER_NAME_AZURE_BLOB_ALIAS: + protocol = AzureConstants.HADOOP_AZURE_PROTOCOL; + String blobEndpoint = BlobUtils.getEndpointFromClient(configurations); + return protocol + "://" + container + "@" + blobEndpoint + "/"; case ExternalDataConstants.KEY_ADAPTER_NAME_AZURE_DATA_LAKE: + case ExternalDataConstants.KEY_ADAPTER_NAME_AZURE_DATA_LAKE_ALIAS: protocol = AzureConstants.HADOOP_AZURE_PROTOCOL; - break; + String dataLakeEndpoint = DatalakeUtils.getEndpointFromClient(configurations); + return protocol + "://" + container + "@" + dataLakeEndpoint + "/"; case ExternalDataConstants.KEY_ADAPTER_NAME_GCS: protocol = GCSConstants.HADOOP_GCS_PROTOCOL; - break; + return protocol + "://" + container + "/"; case ExternalDataConstants.KEY_ADAPTER_NAME_LOCALFS: String path = getDefinitionOrPath(configurations); String[] nodePathPair = path.trim().split("://"); protocol = nodePathPair[0]; - break; + return protocol + "://" + container + "/"; case ExternalDataConstants.KEY_ADAPTER_NAME_HDFS: // Remove trailing slashes as prefixes/paths in hdfs start with a slash (absolute paths) return configurations.get(ExternalDataConstants.KEY_HDFS_URL).replaceAll("/+$", ""); @@ -1164,7 +1172,6 @@ public class ExternalDataUtils { return ""; } - return protocol + "://" + container + "/"; } public static void validateType(Map<String, String> properties, ARecordType itemType) throws CompilationException { diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/azure/blob/BlobUtils.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/azure/blob/BlobUtils.java index 48601883b9..a695320511 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/azure/blob/BlobUtils.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/azure/blob/BlobUtils.java @@ -52,6 +52,7 @@ import org.apache.asterix.external.util.ExternalDataConstants; import org.apache.asterix.external.util.ExternalDataPrefix; import org.apache.asterix.external.util.ExternalDataUtils; import org.apache.asterix.external.util.azure.AzureConstants; +import org.apache.asterix.external.util.azure.AzureUtils; import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException; import org.apache.hyracks.api.exceptions.HyracksDataException; import org.apache.hyracks.api.exceptions.IWarningCollector; @@ -344,4 +345,20 @@ public class BlobUtils { throw new CompilationException(ErrorCode.EXTERNAL_SOURCE_ERROR, ex, getMessageOrToString(ex)); } } + + public static String getEndpointFromClient(Map<String, String> configuration) throws CompilationException { + String endpoint = configuration.get(ENDPOINT_FIELD_NAME); + if (endpoint == null) { + throw new CompilationException(PARAMETERS_REQUIRED, ENDPOINT_FIELD_NAME); + } + + BlobServiceClientBuilder builder = new BlobServiceClientBuilder(); + try { + builder.endpoint(endpoint); + } catch (Exception ex) { + throw new CompilationException(ErrorCode.EXTERNAL_SOURCE_ERROR, ex, getMessageOrToString(ex)); + } + return AzureUtils.extractEndPoint(builder.buildClient().getAccountUrl()); + } + } diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/azure/datalake/DatalakeUtils.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/azure/datalake/DatalakeUtils.java index 475ede3b95..395cb0f43f 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/azure/datalake/DatalakeUtils.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/azure/datalake/DatalakeUtils.java @@ -54,6 +54,7 @@ import org.apache.asterix.external.util.ExternalDataConstants; import org.apache.asterix.external.util.ExternalDataPrefix; import org.apache.asterix.external.util.ExternalDataUtils; import org.apache.asterix.external.util.azure.AzureConstants; +import org.apache.asterix.external.util.azure.AzureUtils; import org.apache.hyracks.api.exceptions.HyracksDataException; import org.apache.hyracks.api.exceptions.IWarningCollector; import org.apache.hyracks.api.exceptions.SourceLocation; @@ -315,4 +316,20 @@ public class DatalakeUtils { throw new CompilationException(ErrorCode.EXTERNAL_SOURCE_ERROR, ex, getMessageOrToString(ex)); } } + + public static String getEndpointFromClient(Map<String, String> configuration) throws CompilationException { + String endpoint = configuration.get(ENDPOINT_FIELD_NAME); + if (endpoint == null) { + throw new CompilationException(PARAMETERS_REQUIRED, ENDPOINT_FIELD_NAME); + } + + DataLakeServiceClientBuilder builder = new DataLakeServiceClientBuilder(); + try { + builder.endpoint(endpoint); + } catch (Exception ex) { + throw new CompilationException(ErrorCode.EXTERNAL_SOURCE_ERROR, ex, getMessageOrToString(ex)); + } + return AzureUtils.extractEndPoint(builder.buildClient().getAccountUrl()); + } + }
