This is an automated email from the ASF dual-hosted git repository.
htowaileb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/asterixdb.git
The following commit(s) were added to refs/heads/master by this push:
new 6b532e5876 [ASTERIXDB-3243][EXT]: Issue warning and skip file on
computed field type mismatch
6b532e5876 is described below
commit 6b532e5876b7198ee6c37cb5d9b1ccdc3e35e1e5
Author: Hussain Towaileb <[email protected]>
AuthorDate: Thu Aug 10 07:07:40 2023 +0300
[ASTERIXDB-3243][EXT]: Issue warning and skip file on computed field type
mismatch
Change-Id: I3208bcd8b59a0ca6351053bc3ce06e76ecb1d462
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/17715
Integration-Tests: Jenkins <[email protected]>
Tested-by: Jenkins <[email protected]>
Reviewed-by: Hussain Towaileb <[email protected]>
Reviewed-by: Wail Alkowaileet <[email protected]>
---
.../external_dataset/PrefixComputedFieldsTest.java | 16 ++++-----
.../s3/filter/type-mismatch/test.000.ddl.sqlpp | 36 ++++++++++++++++++++
.../s3/filter/type-mismatch/test.010.query.sqlpp | 26 +++++++++++++++
.../s3/filter/type-mismatch/test.999.ddl.sqlpp | 20 +++++++++++
.../s3/filter/type-mismatch/result.010.adm | 1 +
.../runtimets/testsuite_external_dataset_s3.xml | 11 ++++++
.../asterix/common/exceptions/ErrorCode.java | 1 +
.../src/main/resources/asx_errormsg/en.properties | 1 +
.../input/filter/ExternalFilterValueEvaluator.java | 25 ++++++++------
.../record/reader/aws/AwsS3InputStreamFactory.java | 2 +-
.../asterix/external/util/ExternalDataPrefix.java | 39 +++++++++++++++++++---
11 files changed, 154 insertions(+), 24 deletions(-)
diff --git
a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/PrefixComputedFieldsTest.java
b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/PrefixComputedFieldsTest.java
index efa4c79612..7b002400ba 100644
---
a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/PrefixComputedFieldsTest.java
+++
b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/PrefixComputedFieldsTest.java
@@ -49,7 +49,7 @@ public class PrefixComputedFieldsTest extends TestCase {
assertTrue(prefix.getIndexToComputedFieldsMap().isEmpty());
String prefix1 = "";
- prefix = new ExternalDataPrefix(prefix1);
+ prefix = new ExternalDataPrefix(prefix1, null);
assertEquals("", prefix.getOriginal());
assertEquals("", prefix.getRoot());
assertFalse(prefix.isEndsWithSlash());
@@ -60,7 +60,7 @@ public class PrefixComputedFieldsTest extends TestCase {
assertTrue(prefix.getIndexToComputedFieldsMap().isEmpty());
String prefix2 = "hotel";
- prefix = new ExternalDataPrefix(prefix2);
+ prefix = new ExternalDataPrefix(prefix2, null);
assertEquals("hotel", prefix.getOriginal());
assertEquals("hotel", prefix.getRoot());
assertFalse(prefix.isEndsWithSlash());
@@ -71,7 +71,7 @@ public class PrefixComputedFieldsTest extends TestCase {
assertTrue(prefix.getIndexToComputedFieldsMap().isEmpty());
String prefix3 = "hotel/{hotel-id:inT}/";
- prefix = new ExternalDataPrefix(prefix3);
+ prefix = new ExternalDataPrefix(prefix3, null);
assertEquals("hotel/{hotel-id:inT}/", prefix.getOriginal());
assertEquals("hotel/", prefix.getRoot());
assertTrue(prefix.isEndsWithSlash());
@@ -82,7 +82,7 @@ public class PrefixComputedFieldsTest extends TestCase {
assertEquals("(.+)",
prefix.getIndexToComputedFieldsMap().get(1).getExpression());
String prefix4 = "hotel/{hotel-id:int}-{hotel-name:sTRing}";
- prefix = new ExternalDataPrefix(prefix4);
+ prefix = new ExternalDataPrefix(prefix4, null);
assertEquals("hotel/{hotel-id:int}-{hotel-name:sTRing}",
prefix.getOriginal());
assertEquals("hotel", prefix.getRoot());
assertFalse(prefix.isEndsWithSlash());
@@ -93,7 +93,7 @@ public class PrefixComputedFieldsTest extends TestCase {
assertEquals("(.+)-(.+)",
prefix.getIndexToComputedFieldsMap().get(1).getExpression());
String prefix5 =
"hotel/something/{hotel-id:int}-{hotel-name:sTRing}/review/{year:int}-{month:int}-{day:int}/";
- prefix = new ExternalDataPrefix(prefix5);
+ prefix = new ExternalDataPrefix(prefix5, null);
assertEquals("hotel/something/{hotel-id:int}-{hotel-name:sTRing}/review/{year:int}-{month:int}-{day:int}/",
prefix.getOriginal());
assertEquals("hotel/something/", prefix.getRoot());
@@ -107,7 +107,7 @@ public class PrefixComputedFieldsTest extends TestCase {
assertEquals("(.+)-(.+)-(.+)",
prefix.getIndexToComputedFieldsMap().get(4).getExpression());
String prefix6 =
"hotel/something/{hotel-id:int}-{hotel-name:sTRing}/review/{year:int}/{month:int}/{day:int}";
- prefix = new ExternalDataPrefix(prefix6);
+ prefix = new ExternalDataPrefix(prefix6, null);
assertEquals("hotel/something/{hotel-id:int}-{hotel-name:sTRing}/review/{year:int}/{month:int}/{day:int}",
prefix.getOriginal());
assertEquals("hotel/something", prefix.getRoot());
@@ -123,7 +123,7 @@ public class PrefixComputedFieldsTest extends TestCase {
assertEquals("(.+)",
prefix.getIndexToComputedFieldsMap().get(6).getExpression());
String prefix7 = "hotel/{hotel.details.id:int}-{hotel-name:sTRing}";
- prefix = new ExternalDataPrefix(prefix7);
+ prefix = new ExternalDataPrefix(prefix7, null);
assertEquals("hotel/{hotel.details.id:int}-{hotel-name:sTRing}",
prefix.getOriginal());
assertEquals("hotel", prefix.getRoot());
assertFalse(prefix.isEndsWithSlash());
@@ -134,7 +134,7 @@ public class PrefixComputedFieldsTest extends TestCase {
String prefix8 =
"hotel/hotel-{hotel-id:int}-hotel-{hotel-name:sTRing}/review/year-{year:int}/{month:int}-month/day-{day:int}-day";
- prefix = new ExternalDataPrefix(prefix8);
+ prefix = new ExternalDataPrefix(prefix8, null);
assertEquals(
"hotel/hotel-{hotel-id:int}-hotel-{hotel-name:sTRing}/review/year-{year:int}/{month:int}-month/day-{day:int}-day",
prefix.getOriginal());
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/filter/type-mismatch/test.000.ddl.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/filter/type-mismatch/test.000.ddl.sqlpp
new file mode 100644
index 0000000000..7b70bd37d2
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/filter/type-mismatch/test.000.ddl.sqlpp
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+DROP DATAVERSE test IF EXISTS;
+CREATE DATAVERSE test;
+USE test;
+
+CREATE TYPE test AS {
+};
+
+
+CREATE EXTERNAL DATASET test(test) USING S3 (
+ ("accessKeyId"="dummyAccessKey"),
+ ("secretAccessKey"="dummySecretKey"),
+ ("region"="us-west-2"),
+ ("serviceEndpoint"="http://127.0.0.1:8001"),
+ ("container"="playground"),
+ ("definition"="external-filter/{name:bigint}"),
+ ("format"="json")
+);
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/filter/type-mismatch/test.010.query.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/filter/type-mismatch/test.010.query.sqlpp
new file mode 100644
index 0000000000..13b6abe99e
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/filter/type-mismatch/test.010.query.sqlpp
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+// param max-warnings:json=10
+
+USE test;
+
+SELECT value count(*)
+FROM test t
+WHERE t.name = "department";
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/filter/type-mismatch/test.999.ddl.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/filter/type-mismatch/test.999.ddl.sqlpp
new file mode 100644
index 0000000000..36b2bab543
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/filter/type-mismatch/test.999.ddl.sqlpp
@@ -0,0 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+DROP DATAVERSE test IF EXISTS;
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/s3/filter/type-mismatch/result.010.adm
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/s3/filter/type-mismatch/result.010.adm
new file mode 100644
index 0000000000..c227083464
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/s3/filter/type-mismatch/result.010.adm
@@ -0,0 +1 @@
+0
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
index 4414ebfc4e..e50ec3cc8e 100644
---
a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
@@ -226,6 +226,17 @@
<output-dir compare="Text">one-field</output-dir>
</compilation-unit>
</test-case>
+ <test-case FilePath="external-dataset/s3/filter" check-warnings="true">
+ <compilation-unit name="type-mismatch">
+ <output-dir compare="Text">type-mismatch</output-dir>
+ <expected-warn>Failed to evaluate computed field. File:
'external-filter/department/accounting/0.json'. Computed Field Name: 'name'.
Computed Field Type: 'bigint'. Computed Field Value: 'department'. Reason:
'java.lang.NumberFormatException: For input string:
"department"'</expected-warn>
+ <expected-warn>Failed to evaluate computed field. File:
'external-filter/department/engineering/0.json'. Computed Field Name: 'name'.
Computed Field Type: 'bigint'. Computed Field Value: 'department'. Reason:
'java.lang.NumberFormatException: For input string:
"department"'</expected-warn>
+ <expected-warn>Failed to evaluate computed field. File:
'external-filter/department/hr/0.json'. Computed Field Name: 'name'. Computed
Field Type: 'bigint'. Computed Field Value: 'department'. Reason:
'java.lang.NumberFormatException: For input string:
"department"'</expected-warn>
+ <expected-warn>Failed to evaluate computed field. File:
'external-filter/last-name/Jones/0.json'. Computed Field Name: 'name'. Computed
Field Type: 'bigint'. Computed Field Value: 'last-name'. Reason:
'java.lang.NumberFormatException: For input string: "last-name"'</expected-warn>
+ <expected-warn>Failed to evaluate computed field. File:
'external-filter/last-name/miller/0.json'. Computed Field Name: 'name'.
Computed Field Type: 'bigint'. Computed Field Value: 'last-name'. Reason:
'java.lang.NumberFormatException: For input string: "last-name"'</expected-warn>
+ <expected-warn>Failed to evaluate computed field. File:
'external-filter/last-name/smith/0.json'. Computed Field Name: 'name'. Computed
Field Type: 'bigint'. Computed Field Value: 'last-name'. Reason:
'java.lang.NumberFormatException: For input string: "last-name"'</expected-warn>
+ </compilation-unit>
+ </test-case>
<test-case FilePath="external-dataset">
<compilation-unit name="common/empty-string-definition">
<placeholder name="adapter" value="S3" />
diff --git
a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/exceptions/ErrorCode.java
b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/exceptions/ErrorCode.java
index ab2a978482..3ac665c311 100644
---
a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/exceptions/ErrorCode.java
+++
b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/exceptions/ErrorCode.java
@@ -277,6 +277,7 @@ public enum ErrorCode implements IError {
ERROR_READING_ICEBERG_METADATA(1180),
UNSUPPORTED_COMPUTED_FIELD_TYPE(1181),
FAILED_TO_CALCULATE_COMPUTED_FIELDS(1182),
+ FAILED_TO_EVALUATE_COMPUTED_FIELD(1183),
// Feed errors
DATAFLOW_ILLEGAL_STATE(3001),
diff --git
a/asterixdb/asterix-common/src/main/resources/asx_errormsg/en.properties
b/asterixdb/asterix-common/src/main/resources/asx_errormsg/en.properties
index b51b09a20b..3c9543a435 100644
--- a/asterixdb/asterix-common/src/main/resources/asx_errormsg/en.properties
+++ b/asterixdb/asterix-common/src/main/resources/asx_errormsg/en.properties
@@ -279,6 +279,7 @@
1180 = Error reading iceberg data
1181 = Unsupported computed field type: %1$s
1182 = Failed to calculate computed fields: %1$s
+1183 = Failed to evaluate computed field. File: '%1$s'. Computed Field Name:
'%2$s'. Computed Field Type: '%3$s'. Computed Field Value: '%4$s'. Reason:
'%5$s'
# Feed Errors
3001 = Illegal state.
diff --git
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/filter/ExternalFilterValueEvaluator.java
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/filter/ExternalFilterValueEvaluator.java
index 6e3556c15b..a07e067534 100644
---
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/filter/ExternalFilterValueEvaluator.java
+++
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/filter/ExternalFilterValueEvaluator.java
@@ -61,16 +61,21 @@ class ExternalFilterValueEvaluator implements
IExternalFilterValueEvaluator {
private void writeValue(ATypeTag typeTag, String stringValue) throws
HyracksDataException {
DataOutput output = value.getDataOutput();
SerializerDeserializerUtil.serializeTag(typeTag, output);
- switch (typeTag) {
- case TINYINT:
- case SMALLINT:
- case INTEGER:
- case BIGINT:
-
Integer64SerializerDeserializer.write(Long.parseLong(stringValue), output);
- case DOUBLE:
-
DoubleSerializerDeserializer.write(Double.parseDouble(stringValue), output);
- case STRING:
- stringSerDer.serialize(stringValue, output);
+
+ try {
+ switch (typeTag) {
+ case TINYINT:
+ case SMALLINT:
+ case INTEGER:
+ case BIGINT:
+
Integer64SerializerDeserializer.write(Long.parseLong(stringValue), output);
+ case DOUBLE:
+
DoubleSerializerDeserializer.write(Double.parseDouble(stringValue), output);
+ case STRING:
+ stringSerDer.serialize(stringValue, output);
+ }
+ } catch (Exception ex) {
+ throw HyracksDataException.create(ex);
}
}
}
diff --git
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/AwsS3InputStreamFactory.java
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/AwsS3InputStreamFactory.java
index 9fa2b5a241..7ae992a5c5 100644
---
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/AwsS3InputStreamFactory.java
+++
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/AwsS3InputStreamFactory.java
@@ -58,7 +58,7 @@ public class AwsS3InputStreamFactory extends
AbstractExternalInputStreamFactory
IncludeExcludeMatcher includeExcludeMatcher =
ExternalDataUtils.getIncludeExcludeMatchers(configuration);
//Get a list of S3 objects
- ExternalDataPrefix externalDataPrefix = new
ExternalDataPrefix(configuration);
+ ExternalDataPrefix externalDataPrefix = new
ExternalDataPrefix(configuration, warningCollector);
configuration.put(ExternalDataPrefix.PREFIX_ROOT_FIELD_NAME,
externalDataPrefix.getRoot());
// TODO(htowaileb): Since we're using the root to load the files then
start filtering, it might end up being
diff --git
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataPrefix.java
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataPrefix.java
index 2a707fb38c..66fa445d62 100644
---
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataPrefix.java
+++
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataPrefix.java
@@ -46,6 +46,10 @@ import org.apache.asterix.om.types.IAType;
import org.apache.asterix.om.utils.ProjectionFiltrationTypeUtil;
import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.api.exceptions.IWarningCollector;
+import org.apache.hyracks.api.exceptions.NoOpWarningCollector;
+import org.apache.hyracks.api.exceptions.Warning;
+import org.apache.hyracks.util.LogRedactionUtil;
public class ExternalDataPrefix {
@@ -53,6 +57,7 @@ public class ExternalDataPrefix {
private String root;
private final boolean endsWithSlash;
private final List<String> segments;
+ private final IWarningCollector warningCollector;
private final List<String> computedFieldNames = new ArrayList<>();
private final List<IAType> computedFieldTypes = new ArrayList<>();
@@ -66,13 +71,20 @@ public class ExternalDataPrefix {
static {
supportedTypes.add(ATypeTag.STRING);
supportedTypes.add(ATypeTag.BIGINT);
+ supportedTypes.add(ATypeTag.DOUBLE);
}
public ExternalDataPrefix(Map<String, String> configuration) throws
AlgebricksException {
- this(getDefinitionOrPath(configuration));
+ this(getDefinitionOrPath(configuration), null);
}
- public ExternalDataPrefix(String prefix) throws AlgebricksException {
+ public ExternalDataPrefix(Map<String, String> configuration,
IWarningCollector warningCollector)
+ throws AlgebricksException {
+ this(getDefinitionOrPath(configuration), warningCollector);
+ }
+
+ public ExternalDataPrefix(String prefix, IWarningCollector
warningCollector) throws AlgebricksException {
+ this.warningCollector = warningCollector != null ? warningCollector :
NoOpWarningCollector.INSTANCE;
this.original = prefix != null ? prefix : "";
this.endsWithSlash = this.original.endsWith("/");
@@ -251,10 +263,27 @@ public class ExternalDataPrefix {
// extract values for all compute fields and set them in the evaluator
// TODO provide the List to avoid array creation
List<String> values = extractValues(keySegments);
- for (int i = 0; i < computedFieldNames.size(); i++) {
- if (evaluator.isComputedFieldUsed(i)) {
- evaluator.setValue(i, values.get(i));
+
+ String computedFieldName = null;
+ IAType computedFieldType = null;
+ String computedFieldValue = null;
+ try {
+ for (int i = 0; i < computedFieldNames.size(); i++) {
+ computedFieldName = computedFieldNames.get(i);
+ computedFieldType = computedFieldTypes.get(i);
+ computedFieldValue = values.get(i);
+
+ if (evaluator.isComputedFieldUsed(i)) {
+ evaluator.setValue(i, computedFieldValue);
+ }
+ }
+ } catch (HyracksDataException ex) {
+ if (warningCollector.shouldWarn()) {
+ warningCollector.warn(Warning.of(null,
ErrorCode.FAILED_TO_EVALUATE_COMPUTED_FIELD,
+ LogRedactionUtil.userData(key), computedFieldName,
computedFieldType,
+ LogRedactionUtil.userData(computedFieldValue),
LogRedactionUtil.userData(ex.getMessage())));
}
+ return false;
}
return evaluator.evaluate();