>From Hussain Towaileb <[email protected]>: Hussain Towaileb has submitted this change. ( https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/17715 )
Change subject: [ASTERIXDB-3243][EXT]: Issue warning and skip file on computed field type mismatch ...................................................................... [ASTERIXDB-3243][EXT]: Issue warning and skip file on computed field type mismatch Change-Id: I3208bcd8b59a0ca6351053bc3ce06e76ecb1d462 Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/17715 Integration-Tests: Jenkins <[email protected]> Tested-by: Jenkins <[email protected]> Reviewed-by: Hussain Towaileb <[email protected]> Reviewed-by: Wail Alkowaileet <[email protected]> --- A asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/filter/type-mismatch/test.010.query.sqlpp A asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/filter/type-mismatch/test.999.ddl.sqlpp M asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml M asterixdb/asterix-common/src/main/java/org/apache/asterix/common/exceptions/ErrorCode.java A asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/filter/type-mismatch/test.000.ddl.sqlpp M asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/PrefixComputedFieldsTest.java M asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/AwsS3InputStreamFactory.java M asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataPrefix.java A asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/s3/filter/type-mismatch/result.010.adm M asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/filter/ExternalFilterValueEvaluator.java M asterixdb/asterix-common/src/main/resources/asx_errormsg/en.properties 11 files changed, 168 insertions(+), 24 deletions(-) Approvals: Wail Alkowaileet: Looks good to me, approved Hussain Towaileb: Looks good to me, but someone else must approve Jenkins: Verified; Verified Objections: Anon. E. Moose #1000171: Violations found diff --git a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/PrefixComputedFieldsTest.java b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/PrefixComputedFieldsTest.java index efa4c79..7b00240 100644 --- a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/PrefixComputedFieldsTest.java +++ b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/PrefixComputedFieldsTest.java @@ -49,7 +49,7 @@ assertTrue(prefix.getIndexToComputedFieldsMap().isEmpty()); String prefix1 = ""; - prefix = new ExternalDataPrefix(prefix1); + prefix = new ExternalDataPrefix(prefix1, null); assertEquals("", prefix.getOriginal()); assertEquals("", prefix.getRoot()); assertFalse(prefix.isEndsWithSlash()); @@ -60,7 +60,7 @@ assertTrue(prefix.getIndexToComputedFieldsMap().isEmpty()); String prefix2 = "hotel"; - prefix = new ExternalDataPrefix(prefix2); + prefix = new ExternalDataPrefix(prefix2, null); assertEquals("hotel", prefix.getOriginal()); assertEquals("hotel", prefix.getRoot()); assertFalse(prefix.isEndsWithSlash()); @@ -71,7 +71,7 @@ assertTrue(prefix.getIndexToComputedFieldsMap().isEmpty()); String prefix3 = "hotel/{hotel-id:inT}/"; - prefix = new ExternalDataPrefix(prefix3); + prefix = new ExternalDataPrefix(prefix3, null); assertEquals("hotel/{hotel-id:inT}/", prefix.getOriginal()); assertEquals("hotel/", prefix.getRoot()); assertTrue(prefix.isEndsWithSlash()); @@ -82,7 +82,7 @@ assertEquals("(.+)", prefix.getIndexToComputedFieldsMap().get(1).getExpression()); String prefix4 = "hotel/{hotel-id:int}-{hotel-name:sTRing}"; - prefix = new ExternalDataPrefix(prefix4); + prefix = new ExternalDataPrefix(prefix4, null); assertEquals("hotel/{hotel-id:int}-{hotel-name:sTRing}", prefix.getOriginal()); assertEquals("hotel", prefix.getRoot()); assertFalse(prefix.isEndsWithSlash()); @@ -93,7 +93,7 @@ assertEquals("(.+)-(.+)", prefix.getIndexToComputedFieldsMap().get(1).getExpression()); String prefix5 = "hotel/something/{hotel-id:int}-{hotel-name:sTRing}/review/{year:int}-{month:int}-{day:int}/"; - prefix = new ExternalDataPrefix(prefix5); + prefix = new ExternalDataPrefix(prefix5, null); assertEquals("hotel/something/{hotel-id:int}-{hotel-name:sTRing}/review/{year:int}-{month:int}-{day:int}/", prefix.getOriginal()); assertEquals("hotel/something/", prefix.getRoot()); @@ -107,7 +107,7 @@ assertEquals("(.+)-(.+)-(.+)", prefix.getIndexToComputedFieldsMap().get(4).getExpression()); String prefix6 = "hotel/something/{hotel-id:int}-{hotel-name:sTRing}/review/{year:int}/{month:int}/{day:int}"; - prefix = new ExternalDataPrefix(prefix6); + prefix = new ExternalDataPrefix(prefix6, null); assertEquals("hotel/something/{hotel-id:int}-{hotel-name:sTRing}/review/{year:int}/{month:int}/{day:int}", prefix.getOriginal()); assertEquals("hotel/something", prefix.getRoot()); @@ -123,7 +123,7 @@ assertEquals("(.+)", prefix.getIndexToComputedFieldsMap().get(6).getExpression()); String prefix7 = "hotel/{hotel.details.id:int}-{hotel-name:sTRing}"; - prefix = new ExternalDataPrefix(prefix7); + prefix = new ExternalDataPrefix(prefix7, null); assertEquals("hotel/{hotel.details.id:int}-{hotel-name:sTRing}", prefix.getOriginal()); assertEquals("hotel", prefix.getRoot()); assertFalse(prefix.isEndsWithSlash()); @@ -134,7 +134,7 @@ String prefix8 = "hotel/hotel-{hotel-id:int}-hotel-{hotel-name:sTRing}/review/year-{year:int}/{month:int}-month/day-{day:int}-day"; - prefix = new ExternalDataPrefix(prefix8); + prefix = new ExternalDataPrefix(prefix8, null); assertEquals( "hotel/hotel-{hotel-id:int}-hotel-{hotel-name:sTRing}/review/year-{year:int}/{month:int}-month/day-{day:int}-day", prefix.getOriginal()); diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/filter/type-mismatch/test.000.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/filter/type-mismatch/test.000.ddl.sqlpp new file mode 100644 index 0000000..7b70bd3 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/filter/type-mismatch/test.000.ddl.sqlpp @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +DROP DATAVERSE test IF EXISTS; +CREATE DATAVERSE test; +USE test; + +CREATE TYPE test AS { +}; + + +CREATE EXTERNAL DATASET test(test) USING S3 ( + ("accessKeyId"="dummyAccessKey"), + ("secretAccessKey"="dummySecretKey"), + ("region"="us-west-2"), + ("serviceEndpoint"="http://127.0.0.1:8001"), + ("container"="playground"), + ("definition"="external-filter/{name:bigint}"), + ("format"="json") +); \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/filter/type-mismatch/test.010.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/filter/type-mismatch/test.010.query.sqlpp new file mode 100644 index 0000000..13b6abe --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/filter/type-mismatch/test.010.query.sqlpp @@ -0,0 +1,26 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +// param max-warnings:json=10 + +USE test; + +SELECT value count(*) +FROM test t +WHERE t.name = "department"; \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/filter/type-mismatch/test.999.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/filter/type-mismatch/test.999.ddl.sqlpp new file mode 100644 index 0000000..36b2bab --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/filter/type-mismatch/test.999.ddl.sqlpp @@ -0,0 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +DROP DATAVERSE test IF EXISTS; \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/s3/filter/type-mismatch/result.010.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/s3/filter/type-mismatch/result.010.adm new file mode 100644 index 0000000..c227083 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/s3/filter/type-mismatch/result.010.adm @@ -0,0 +1 @@ +0 \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml index 4414ebf..e50ec3c 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml +++ b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml @@ -226,6 +226,17 @@ <output-dir compare="Text">one-field</output-dir> </compilation-unit> </test-case> + <test-case FilePath="external-dataset/s3/filter" check-warnings="true"> + <compilation-unit name="type-mismatch"> + <output-dir compare="Text">type-mismatch</output-dir> + <expected-warn>Failed to evaluate computed field. File: 'external-filter/department/accounting/0.json'. Computed Field Name: 'name'. Computed Field Type: 'bigint'. Computed Field Value: 'department'. Reason: 'java.lang.NumberFormatException: For input string: "department"'</expected-warn> + <expected-warn>Failed to evaluate computed field. File: 'external-filter/department/engineering/0.json'. Computed Field Name: 'name'. Computed Field Type: 'bigint'. Computed Field Value: 'department'. Reason: 'java.lang.NumberFormatException: For input string: "department"'</expected-warn> + <expected-warn>Failed to evaluate computed field. File: 'external-filter/department/hr/0.json'. Computed Field Name: 'name'. Computed Field Type: 'bigint'. Computed Field Value: 'department'. Reason: 'java.lang.NumberFormatException: For input string: "department"'</expected-warn> + <expected-warn>Failed to evaluate computed field. File: 'external-filter/last-name/Jones/0.json'. Computed Field Name: 'name'. Computed Field Type: 'bigint'. Computed Field Value: 'last-name'. Reason: 'java.lang.NumberFormatException: For input string: "last-name"'</expected-warn> + <expected-warn>Failed to evaluate computed field. File: 'external-filter/last-name/miller/0.json'. Computed Field Name: 'name'. Computed Field Type: 'bigint'. Computed Field Value: 'last-name'. Reason: 'java.lang.NumberFormatException: For input string: "last-name"'</expected-warn> + <expected-warn>Failed to evaluate computed field. File: 'external-filter/last-name/smith/0.json'. Computed Field Name: 'name'. Computed Field Type: 'bigint'. Computed Field Value: 'last-name'. Reason: 'java.lang.NumberFormatException: For input string: "last-name"'</expected-warn> + </compilation-unit> + </test-case> <test-case FilePath="external-dataset"> <compilation-unit name="common/empty-string-definition"> <placeholder name="adapter" value="S3" /> diff --git a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/exceptions/ErrorCode.java b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/exceptions/ErrorCode.java index ab2a978..3ac665c 100644 --- a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/exceptions/ErrorCode.java +++ b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/exceptions/ErrorCode.java @@ -277,6 +277,7 @@ ERROR_READING_ICEBERG_METADATA(1180), UNSUPPORTED_COMPUTED_FIELD_TYPE(1181), FAILED_TO_CALCULATE_COMPUTED_FIELDS(1182), + FAILED_TO_EVALUATE_COMPUTED_FIELD(1183), // Feed errors DATAFLOW_ILLEGAL_STATE(3001), diff --git a/asterixdb/asterix-common/src/main/resources/asx_errormsg/en.properties b/asterixdb/asterix-common/src/main/resources/asx_errormsg/en.properties index b51b09a..3c9543a 100644 --- a/asterixdb/asterix-common/src/main/resources/asx_errormsg/en.properties +++ b/asterixdb/asterix-common/src/main/resources/asx_errormsg/en.properties @@ -279,6 +279,7 @@ 1180 = Error reading iceberg data 1181 = Unsupported computed field type: %1$s 1182 = Failed to calculate computed fields: %1$s +1183 = Failed to evaluate computed field. File: '%1$s'. Computed Field Name: '%2$s'. Computed Field Type: '%3$s'. Computed Field Value: '%4$s'. Reason: '%5$s' # Feed Errors 3001 = Illegal state. diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/filter/ExternalFilterValueEvaluator.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/filter/ExternalFilterValueEvaluator.java index 6e3556c..a07e067 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/filter/ExternalFilterValueEvaluator.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/filter/ExternalFilterValueEvaluator.java @@ -61,16 +61,21 @@ private void writeValue(ATypeTag typeTag, String stringValue) throws HyracksDataException { DataOutput output = value.getDataOutput(); SerializerDeserializerUtil.serializeTag(typeTag, output); - switch (typeTag) { - case TINYINT: - case SMALLINT: - case INTEGER: - case BIGINT: - Integer64SerializerDeserializer.write(Long.parseLong(stringValue), output); - case DOUBLE: - DoubleSerializerDeserializer.write(Double.parseDouble(stringValue), output); - case STRING: - stringSerDer.serialize(stringValue, output); + + try { + switch (typeTag) { + case TINYINT: + case SMALLINT: + case INTEGER: + case BIGINT: + Integer64SerializerDeserializer.write(Long.parseLong(stringValue), output); + case DOUBLE: + DoubleSerializerDeserializer.write(Double.parseDouble(stringValue), output); + case STRING: + stringSerDer.serialize(stringValue, output); + } + } catch (Exception ex) { + throw HyracksDataException.create(ex); } } } diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/AwsS3InputStreamFactory.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/AwsS3InputStreamFactory.java index 9fa2b5a..7ae992a 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/AwsS3InputStreamFactory.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/AwsS3InputStreamFactory.java @@ -58,7 +58,7 @@ IncludeExcludeMatcher includeExcludeMatcher = ExternalDataUtils.getIncludeExcludeMatchers(configuration); //Get a list of S3 objects - ExternalDataPrefix externalDataPrefix = new ExternalDataPrefix(configuration); + ExternalDataPrefix externalDataPrefix = new ExternalDataPrefix(configuration, warningCollector); configuration.put(ExternalDataPrefix.PREFIX_ROOT_FIELD_NAME, externalDataPrefix.getRoot()); // TODO(htowaileb): Since we're using the root to load the files then start filtering, it might end up being diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataPrefix.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataPrefix.java index 2a707fb..66fa445 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataPrefix.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataPrefix.java @@ -46,6 +46,10 @@ import org.apache.asterix.om.utils.ProjectionFiltrationTypeUtil; import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException; import org.apache.hyracks.api.exceptions.HyracksDataException; +import org.apache.hyracks.api.exceptions.IWarningCollector; +import org.apache.hyracks.api.exceptions.NoOpWarningCollector; +import org.apache.hyracks.api.exceptions.Warning; +import org.apache.hyracks.util.LogRedactionUtil; public class ExternalDataPrefix { @@ -53,6 +57,7 @@ private String root; private final boolean endsWithSlash; private final List<String> segments; + private final IWarningCollector warningCollector; private final List<String> computedFieldNames = new ArrayList<>(); private final List<IAType> computedFieldTypes = new ArrayList<>(); @@ -66,13 +71,20 @@ static { supportedTypes.add(ATypeTag.STRING); supportedTypes.add(ATypeTag.BIGINT); + supportedTypes.add(ATypeTag.DOUBLE); } public ExternalDataPrefix(Map<String, String> configuration) throws AlgebricksException { - this(getDefinitionOrPath(configuration)); + this(getDefinitionOrPath(configuration), null); } - public ExternalDataPrefix(String prefix) throws AlgebricksException { + public ExternalDataPrefix(Map<String, String> configuration, IWarningCollector warningCollector) + throws AlgebricksException { + this(getDefinitionOrPath(configuration), warningCollector); + } + + public ExternalDataPrefix(String prefix, IWarningCollector warningCollector) throws AlgebricksException { + this.warningCollector = warningCollector != null ? warningCollector : NoOpWarningCollector.INSTANCE; this.original = prefix != null ? prefix : ""; this.endsWithSlash = this.original.endsWith("/"); @@ -251,10 +263,27 @@ // extract values for all compute fields and set them in the evaluator // TODO provide the List to avoid array creation List<String> values = extractValues(keySegments); - for (int i = 0; i < computedFieldNames.size(); i++) { - if (evaluator.isComputedFieldUsed(i)) { - evaluator.setValue(i, values.get(i)); + + String computedFieldName = null; + IAType computedFieldType = null; + String computedFieldValue = null; + try { + for (int i = 0; i < computedFieldNames.size(); i++) { + computedFieldName = computedFieldNames.get(i); + computedFieldType = computedFieldTypes.get(i); + computedFieldValue = values.get(i); + + if (evaluator.isComputedFieldUsed(i)) { + evaluator.setValue(i, computedFieldValue); + } } + } catch (HyracksDataException ex) { + if (warningCollector.shouldWarn()) { + warningCollector.warn(Warning.of(null, ErrorCode.FAILED_TO_EVALUATE_COMPUTED_FIELD, + LogRedactionUtil.userData(key), computedFieldName, computedFieldType, + LogRedactionUtil.userData(computedFieldValue), LogRedactionUtil.userData(ex.getMessage()))); + } + return false; } return evaluator.evaluate(); -- To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/17715 To unsubscribe, or for help writing mail filters, visit https://asterix-gerrit.ics.uci.edu/settings Gerrit-Project: asterixdb Gerrit-Branch: master Gerrit-Change-Id: I3208bcd8b59a0ca6351053bc3ce06e76ecb1d462 Gerrit-Change-Number: 17715 Gerrit-PatchSet: 5 Gerrit-Owner: Hussain Towaileb <[email protected]> Gerrit-Reviewer: Anon. E. Moose #1000171 Gerrit-Reviewer: Hussain Towaileb <[email protected]> Gerrit-Reviewer: Jenkins <[email protected]> Gerrit-Reviewer: Wail Alkowaileet <[email protected]> Gerrit-MessageType: merged
