>From Hussain Towaileb <[email protected]>: Hussain Towaileb has submitted this change. ( https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/12983 )
Change subject: [NO ISSUE]: Google cloud storage: verify authentication before creating external dataset ...................................................................... [NO ISSUE]: Google cloud storage: verify authentication before creating external dataset Change-Id: Id2a9c6ea70bb5ffe61c486b0b916926a50beaf67 Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/12983 Reviewed-by: Hussain Towaileb <[email protected]> Reviewed-by: Wael Alkowaileet <[email protected]> Integration-Tests: Jenkins <[email protected]> Tested-by: Jenkins <[email protected]> --- M asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/gcs/GCSInputStreamFactory.java M asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java 2 files changed, 44 insertions(+), 1 deletion(-) Approvals: Wael Alkowaileet: Looks good to me, approved Hussain Towaileb: Looks good to me, but someone else must approve Jenkins: Verified; Verified Objections: Anon. E. Moose #1000171: Violations found diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/gcs/GCSInputStreamFactory.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/gcs/GCSInputStreamFactory.java index 343ca93..cde9fe2 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/gcs/GCSInputStreamFactory.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/gcs/GCSInputStreamFactory.java @@ -67,10 +67,11 @@ List<Blob> filesOnly = new ArrayList<>(); String container = configuration.get(ExternalDataConstants.CONTAINER_NAME_FIELD_NAME); Storage gcs = ExternalDataUtils.GCS.buildClient(configuration); + Storage.BlobListOption options = Storage.BlobListOption.prefix(ExternalDataUtils.getPrefix(configuration)); Page<Blob> items; try { - items = gcs.list(container); + items = gcs.list(container, options); } catch (BaseServiceException ex) { throw new CompilationException(ErrorCode.EXTERNAL_SOURCE_ERROR, ex.getMessage()); } diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java index 4c76823..dacf464 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java @@ -18,6 +18,7 @@ */ package org.apache.asterix.external.util; +import static com.google.cloud.storage.Storage.BlobListOption; import static java.nio.charset.StandardCharsets.UTF_8; import static org.apache.asterix.common.exceptions.ErrorCode.EXTERNAL_SOURCE_ERROR; import static org.apache.asterix.common.exceptions.ErrorCode.PARAMETERS_NOT_ALLOWED_AT_SAME_TIME; @@ -40,7 +41,9 @@ import static org.apache.asterix.external.util.ExternalDataConstants.AzureBlob.CLIENT_SECRET_FIELD_NAME; import static org.apache.asterix.external.util.ExternalDataConstants.AzureBlob.CONNECTION_STRING_FIELD_NAME; import static org.apache.asterix.external.util.ExternalDataConstants.AzureBlob.TENANT_ID_FIELD_NAME; +import static org.apache.asterix.external.util.ExternalDataConstants.CONTAINER_NAME_FIELD_NAME; import static org.apache.asterix.external.util.ExternalDataConstants.GCS.JSON_CREDENTIALS_FIELD_NAME; +import static org.apache.asterix.external.util.ExternalDataConstants.KEY_ADAPTER_NAME_GCS; import static org.apache.asterix.external.util.ExternalDataConstants.KEY_DELIMITER; import static org.apache.asterix.external.util.ExternalDataConstants.KEY_ESCAPE; import static org.apache.asterix.external.util.ExternalDataConstants.KEY_EXCLUDE; @@ -112,7 +115,9 @@ import com.azure.storage.blob.BlobServiceClientBuilder; import com.azure.storage.blob.models.BlobItem; import com.azure.storage.blob.models.ListBlobsOptions; +import com.google.api.gax.paging.Page; import com.google.auth.oauth2.ServiceAccountCredentials; +import com.google.cloud.storage.Blob; import com.google.cloud.storage.Storage; import com.google.cloud.storage.StorageOptions; @@ -573,6 +578,9 @@ case ExternalDataConstants.KEY_ADAPTER_NAME_AZURE_BLOB: Azure.validateProperties(configuration, srcLoc, collector); break; + case KEY_ADAPTER_NAME_GCS: + GCS.validateProperties(configuration, srcLoc, collector); + break; default: // Nothing needs to be done break; @@ -1339,6 +1347,40 @@ return builder.build().getService(); } + + /** + * Validate external dataset properties + * + * @param configuration properties + * @throws CompilationException Compilation exception + */ + public static void validateProperties(Map<String, String> configuration, SourceLocation srcLoc, + IWarningCollector collector) throws CompilationException { + + // check if the format property is present + if (configuration.get(ExternalDataConstants.KEY_FORMAT) == null) { + throw new CompilationException(ErrorCode.PARAMETERS_REQUIRED, srcLoc, ExternalDataConstants.KEY_FORMAT); + } + + validateIncludeExclude(configuration); + String container = configuration.get(ExternalDataConstants.CONTAINER_NAME_FIELD_NAME); + + try { + BlobListOption limitOption = BlobListOption.pageSize(1); + BlobListOption prefixOption = BlobListOption.prefix(getPrefix(configuration)); + Storage storage = buildClient(configuration); + Page<Blob> items = storage.list(container, limitOption, prefixOption); + + if (!items.iterateAll().iterator().hasNext() && collector.shouldWarn()) { + Warning warning = Warning.of(srcLoc, ErrorCode.EXTERNAL_SOURCE_CONFIGURATION_RETURNED_NO_FILES); + collector.warn(warning); + } + } catch (CompilationException ex) { + throw ex; + } catch (Exception ex) { + throw new CompilationException(ErrorCode.EXTERNAL_SOURCE_ERROR, ex.getMessage()); + } + } } public static int roundUpToNearestFrameSize(int size, int framesize) { -- To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/12983 To unsubscribe, or for help writing mail filters, visit https://asterix-gerrit.ics.uci.edu/settings Gerrit-Project: asterixdb Gerrit-Branch: master Gerrit-Change-Id: Id2a9c6ea70bb5ffe61c486b0b916926a50beaf67 Gerrit-Change-Number: 12983 Gerrit-PatchSet: 2 Gerrit-Owner: Hussain Towaileb <[email protected]> Gerrit-Reviewer: Anon. E. Moose #1000171 Gerrit-Reviewer: Hussain Towaileb <[email protected]> Gerrit-Reviewer: Jenkins <[email protected]> Gerrit-Reviewer: Till Westmann <[email protected]> Gerrit-Reviewer: Wael Alkowaileet <[email protected]> Gerrit-MessageType: merged
