>From Preetham Poluparthi <[email protected]>:
Preetham Poluparthi has uploaded this change for review. (
https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/20593?usp=email )
Change subject: [NO ISSUE] Fix parquet azure with dynamic prefix
......................................................................
[NO ISSUE] Fix parquet azure with dynamic prefix
Ext-ref: MB-69466
Change-Id: I81c33dae177fb59256eeb82e04aef4db5533ee37
---
M
asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
M
asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java
M
asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/azure/blob/BlobUtils.java
M
asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/azure/datalake/DatalakeUtils.java
4 files changed, 42 insertions(+), 7 deletions(-)
git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb
refs/changes/93/20593/1
diff --git
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
index 2846cf1..0eb935e 100644
---
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
+++
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
@@ -153,7 +153,9 @@
public static final String KEY_ADAPTER_NAME_HTTP = "http_adapter";
public static final String KEY_ADAPTER_NAME_AWS_S3 = "S3";
public static final String KEY_ADAPTER_NAME_AZURE_BLOB = "AZUREBLOB";
+ public static final String KEY_ADAPTER_NAME_AZURE_BLOB_ALIAS =
"AZURE_BLOB";
public static final String KEY_ADAPTER_NAME_AZURE_DATA_LAKE =
"AZUREDATALAKE";
+ public static final String KEY_ADAPTER_NAME_AZURE_DATA_LAKE_ALIAS =
"AZURE_DATALAKE";
public static final String KEY_ADAPTER_NAME_GCS = "GCS";
public static final String KEY_ADAPTER_NAME_HDFS = "HDFS";
diff --git
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java
index de31531..83c3a8b 100644
---
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java
+++
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java
@@ -87,6 +87,8 @@
import org.apache.asterix.external.util.aws.s3.S3Constants;
import org.apache.asterix.external.util.aws.s3.S3Utils;
import org.apache.asterix.external.util.azure.AzureConstants;
+import org.apache.asterix.external.util.azure.blob.BlobUtils;
+import org.apache.asterix.external.util.azure.datalake.DatalakeUtils;
import org.apache.asterix.external.util.google.GCSConstants;
import org.apache.asterix.external.util.google.GCSUtils;
import org.apache.asterix.om.types.ARecordType;
@@ -1137,26 +1139,32 @@
return configuration.getOrDefault(DEFINITION_FIELD_NAME,
configuration.get(KEY_PATH));
}
- public static String getProtocolContainerPair(Map<String, String>
configurations) {
+ public static String getProtocolContainerPair(Map<String, String>
configurations) throws CompilationException {
String container =
configurations.getOrDefault(ExternalDataConstants.CONTAINER_NAME_FIELD_NAME,
"");
String type =
configurations.getOrDefault(ExternalDataConstants.KEY_EXTERNAL_SOURCE_TYPE, "");
String protocol;
switch (type) {
case ExternalDataConstants.KEY_ADAPTER_NAME_AWS_S3:
protocol = S3Constants.HADOOP_S3_PROTOCOL;
- break;
+ return protocol + "://" + container + "/";
case ExternalDataConstants.KEY_ADAPTER_NAME_AZURE_BLOB:
- case ExternalDataConstants.KEY_ADAPTER_NAME_AZURE_DATA_LAKE:
+ case ExternalDataConstants.KEY_ADAPTER_NAME_AZURE_BLOB_ALIAS:
protocol = AzureConstants.HADOOP_AZURE_PROTOCOL;
- break;
+ String blobEndpoint =
BlobUtils.getEndpointFromClient(configurations);
+ return protocol + "://" + container + "@" + blobEndpoint + "/";
+ case ExternalDataConstants.KEY_ADAPTER_NAME_AZURE_DATA_LAKE:
+ case ExternalDataConstants.KEY_ADAPTER_NAME_AZURE_DATA_LAKE_ALIAS:
+ protocol = AzureConstants.HADOOP_AZURE_PROTOCOL;
+ String dataLakeEndpoint =
DatalakeUtils.getEndpointFromClient(configurations);
+ return protocol + "://" + container + "@" + dataLakeEndpoint +
"/";
case ExternalDataConstants.KEY_ADAPTER_NAME_GCS:
protocol = GCSConstants.HADOOP_GCS_PROTOCOL;
- break;
+ return protocol + "://" + container + "/";
case ExternalDataConstants.KEY_ADAPTER_NAME_LOCALFS:
String path = getDefinitionOrPath(configurations);
String[] nodePathPair = path.trim().split("://");
protocol = nodePathPair[0];
- break;
+ return protocol + "://" + container + "/";
case ExternalDataConstants.KEY_ADAPTER_NAME_HDFS:
// Remove trailing slashes as prefixes/paths in hdfs start
with a slash (absolute paths)
return
configurations.get(ExternalDataConstants.KEY_HDFS_URL).replaceAll("/+$", "");
@@ -1164,7 +1172,6 @@
return "";
}
- return protocol + "://" + container + "/";
}
public static void validateType(Map<String, String> properties,
ARecordType itemType) throws CompilationException {
diff --git
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/azure/blob/BlobUtils.java
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/azure/blob/BlobUtils.java
index 4860188..0c33f08 100644
---
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/azure/blob/BlobUtils.java
+++
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/azure/blob/BlobUtils.java
@@ -52,6 +52,7 @@
import org.apache.asterix.external.util.ExternalDataPrefix;
import org.apache.asterix.external.util.ExternalDataUtils;
import org.apache.asterix.external.util.azure.AzureConstants;
+import org.apache.asterix.external.util.azure.AzureUtils;
import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
import org.apache.hyracks.api.exceptions.HyracksDataException;
import org.apache.hyracks.api.exceptions.IWarningCollector;
@@ -344,4 +345,16 @@
throw new CompilationException(ErrorCode.EXTERNAL_SOURCE_ERROR,
ex, getMessageOrToString(ex));
}
}
+
+ public static String getEndpointFromClient(Map<String, String>
configuration) throws CompilationException {
+ String endpoint = configuration.get(ENDPOINT_FIELD_NAME);
+ if (endpoint == null) {
+ throw new CompilationException(PARAMETERS_REQUIRED,
ENDPOINT_FIELD_NAME);
+ }
+
+ BlobServiceClientBuilder builder = new BlobServiceClientBuilder();
+ builder.endpoint(endpoint);
+ return
AzureUtils.extractEndPoint(builder.buildClient().getAccountUrl());
+ }
+
}
diff --git
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/azure/datalake/DatalakeUtils.java
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/azure/datalake/DatalakeUtils.java
index 475ede3..212a677 100644
---
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/azure/datalake/DatalakeUtils.java
+++
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/azure/datalake/DatalakeUtils.java
@@ -54,6 +54,7 @@
import org.apache.asterix.external.util.ExternalDataPrefix;
import org.apache.asterix.external.util.ExternalDataUtils;
import org.apache.asterix.external.util.azure.AzureConstants;
+import org.apache.asterix.external.util.azure.AzureUtils;
import org.apache.hyracks.api.exceptions.HyracksDataException;
import org.apache.hyracks.api.exceptions.IWarningCollector;
import org.apache.hyracks.api.exceptions.SourceLocation;
@@ -315,4 +316,16 @@
throw new CompilationException(ErrorCode.EXTERNAL_SOURCE_ERROR,
ex, getMessageOrToString(ex));
}
}
+
+ public static String getEndpointFromClient(Map<String, String>
configuration) throws CompilationException {
+ String endpoint = configuration.get(ENDPOINT_FIELD_NAME);
+ if (endpoint == null) {
+ throw new CompilationException(PARAMETERS_REQUIRED,
ENDPOINT_FIELD_NAME);
+ }
+
+ DataLakeServiceClientBuilder builder = new
DataLakeServiceClientBuilder();
+ builder.endpoint(endpoint);
+ return
AzureUtils.extractEndPoint(builder.buildClient().getAccountUrl());
+ }
+
}
--
To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/20593?usp=email
To unsubscribe, or for help writing mail filters, visit
https://asterix-gerrit.ics.uci.edu/settings?usp=email
Gerrit-MessageType: newchange
Gerrit-Project: asterixdb
Gerrit-Branch: phoenix
Gerrit-Change-Id: I81c33dae177fb59256eeb82e04aef4db5533ee37
Gerrit-Change-Number: 20593
Gerrit-PatchSet: 1
Gerrit-Owner: Preetham Poluparthi <[email protected]>