>From Wael Alkowaileet <wael....@gmail.com>: Wael Alkowaileet has submitted this change. ( https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/12905 )
Change subject: [ASTERIXDB-2918][EXT] Validate the type when creating Parquet external dataset ...................................................................... [ASTERIXDB-2918][EXT] Validate the type when creating Parquet external dataset - user model changes: no - storage format changes: no - interface changes: no Details: Ensure the used type - when creating an external dataset using Parquet format - does not contain declared fields. Change-Id: I4870a91ecf41b41996b862704b767e04abc14569 Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/12905 Reviewed-by: Hussain Towaileb <hussai...@gmail.com> Tested-by: Jenkins <jenk...@fulliautomatix.ics.uci.edu> Integration-Tests: Jenkins <jenk...@fulliautomatix.ics.uci.edu> --- M asterixdb/asterix-app/src/main/java/org/apache/asterix/app/translator/QueryTranslator.java A asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/invalid-type/invalid-type.1.ddl.sqlpp M asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml M asterixdb/asterix-common/src/main/java/org/apache/asterix/common/exceptions/ErrorCode.java M asterixdb/asterix-common/src/main/resources/asx_errormsg/en.properties M asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java 6 files changed, 71 insertions(+), 1 deletion(-) Approvals: Hussain Towaileb: Looks good to me, approved Jenkins: Verified; Verified diff --git a/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/translator/QueryTranslator.java b/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/translator/QueryTranslator.java index 3e0cf79..bf6e3b5 100644 --- a/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/translator/QueryTranslator.java +++ b/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/translator/QueryTranslator.java @@ -967,7 +967,9 @@ Datatype itemType, MetadataProvider metadataProvider, MetadataTransactionContext mdTxnCtx) throws AlgebricksException { ExternalDetailsDecl externalDetails = (ExternalDetailsDecl) dd.getDatasetDetailsDecl(); - return externalDetails.getProperties(); + Map<String, String> properties = externalDetails.getProperties(); + ExternalDataUtils.validateType(properties, (ARecordType) itemType.getDatatype()); + return properties; } protected static void validateIfResourceIsActiveInFeed(ICcApplicationContext appCtx, Dataset dataset, diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/invalid-type/invalid-type.1.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/invalid-type/invalid-type.1.ddl.sqlpp new file mode 100644 index 0000000..ad6cd0e --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/invalid-type/invalid-type.1.ddl.sqlpp @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* +* Description : Test type validation for Parquet +* Expected Res : ASX1161: Type 'ParquetType' contains declared fields, which is not supported for 'parquet' format +* Date : August 19th 2021 +*/ + +DROP DATAVERSE test IF EXISTS; +CREATE DATAVERSE test; + +USE test; + +CREATE TYPE ParquetType as { + id: string, + text: string +}; + +CREATE EXTERNAL DATASET ParquetDataset(ParquetType) USING %adapter% +( + %template%, + ("container"="playground"), + ("definition"="parquet-data/reviews"), + ("include"="*id_age.parquet"), + ("format" = "parquet") +); \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml index 41c769d..c461722 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml +++ b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml @@ -129,6 +129,13 @@ <output-dir compare="Text">parquet-temporary-access</output-dir> </compilation-unit> </test-case> + <test-case FilePath="external-dataset"> + <compilation-unit name="common/parquet/invalid-type"> + <placeholder name="adapter" value="S3" /> + <output-dir compare="Text">none</output-dir> + <expected-error>ASX1161: Type 'ParquetType' contains declared fields, which is not supported for 'parquet' format</expected-error> + </compilation-unit> + </test-case> <!-- Parquet Tests End --> <test-case FilePath="external-dataset"> <compilation-unit name="common/empty-string-definition"> diff --git a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/exceptions/ErrorCode.java b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/exceptions/ErrorCode.java index 9cfd6ea..79d663e 100644 --- a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/exceptions/ErrorCode.java +++ b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/exceptions/ErrorCode.java @@ -245,6 +245,7 @@ COMPILATION_BAD_VIEW_DEFINITION(1158), UNKNOWN_VIEW(1159), VIEW_EXISTS(1160), + UNSUPPORTED_TYPE_FOR_PARQUET(1161), // Feed errors DATAFLOW_ILLEGAL_STATE(3001), diff --git a/asterixdb/asterix-common/src/main/resources/asx_errormsg/en.properties b/asterixdb/asterix-common/src/main/resources/asx_errormsg/en.properties index 5f08844..159e0ef 100644 --- a/asterixdb/asterix-common/src/main/resources/asx_errormsg/en.properties +++ b/asterixdb/asterix-common/src/main/resources/asx_errormsg/en.properties @@ -247,6 +247,7 @@ 1158 = Error compiling view %1$s. %2$s 1159 = Cannot find view with name %1$s 1160 = A view with this name %1$s already exists +1161 = Type '%1$s' contains declared fields, which is not supported for 'parquet' format # Feed Errors 3001 = Illegal state. diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java index 36ee203..112e4ee 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java @@ -771,6 +771,23 @@ public static boolean supportsPushdown(Map<String, String> properties) { //Currently, only Apache Parquet format is supported + return isParquetFormat(properties); + } + + /** + * Validate the dataset type declared with a given type + * + * @param properties external dataset configuration + * @param datasetRecordType dataset declared type + */ + public static void validateType(Map<String, String> properties, ARecordType datasetRecordType) + throws CompilationException { + if (isParquetFormat(properties) && datasetRecordType.getFieldTypes().length != 0) { + throw new CompilationException(ErrorCode.UNSUPPORTED_TYPE_FOR_PARQUET, datasetRecordType.getTypeName()); + } + } + + private static boolean isParquetFormat(Map<String, String> properties) { String inputFormat = properties.get(ExternalDataConstants.KEY_INPUT_FORMAT); return ExternalDataConstants.CLASS_NAME_PARQUET_INPUT_FORMAT.equals(inputFormat) || ExternalDataConstants.INPUT_FORMAT_PARQUET.equals(inputFormat) -- To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/12905 To unsubscribe, or for help writing mail filters, visit https://asterix-gerrit.ics.uci.edu/settings Gerrit-Project: asterixdb Gerrit-Branch: master Gerrit-Change-Id: I4870a91ecf41b41996b862704b767e04abc14569 Gerrit-Change-Number: 12905 Gerrit-PatchSet: 3 Gerrit-Owner: Wael Alkowaileet <wael....@gmail.com> Gerrit-Reviewer: Anon. E. Moose #1000171 Gerrit-Reviewer: Hussain Towaileb <hussai...@gmail.com> Gerrit-Reviewer: Jenkins <jenk...@fulliautomatix.ics.uci.edu> Gerrit-Reviewer: Wael Alkowaileet <wael....@gmail.com> Gerrit-MessageType: merged