>From Hussain Towaileb <[email protected]>:

Hussain Towaileb has uploaded this change for review. ( 
https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/17715 )


Change subject: [ASTERIXDB-3243][EXT]: Issue warning and skip file on computed 
field type mismatch
......................................................................

[ASTERIXDB-3243][EXT]: Issue warning and skip file on computed field type 
mismatch

Change-Id: I3208bcd8b59a0ca6351053bc3ce06e76ecb1d462
---
A 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/filter/type-mismatch/test.010.query.sqlpp
M 
asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
M 
asterixdb/asterix-common/src/main/java/org/apache/asterix/common/exceptions/ErrorCode.java
A 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/filter/type-mismatch/test.000.ddl.sqlpp
M 
asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/PrefixComputedFieldsTest.java
M 
asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/AwsS3InputStreamFactory.java
M 
asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataPrefix.java
M 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/filter/001/001.026.query.sqlpp
A 
asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/s3/filter/type-mismatch/result.010.adm
M 
asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/filter/ExternalFilterValueEvaluator.java
M asterixdb/asterix-common/src/main/resources/asx_errormsg/en.properties
11 files changed, 143 insertions(+), 25 deletions(-)



  git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb 
refs/changes/15/17715/1

diff --git 
a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/PrefixComputedFieldsTest.java
 
b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/PrefixComputedFieldsTest.java
index efa4c79..7b00240 100644
--- 
a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/PrefixComputedFieldsTest.java
+++ 
b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/PrefixComputedFieldsTest.java
@@ -49,7 +49,7 @@
         assertTrue(prefix.getIndexToComputedFieldsMap().isEmpty());

         String prefix1 = "";
-        prefix = new ExternalDataPrefix(prefix1);
+        prefix = new ExternalDataPrefix(prefix1, null);
         assertEquals("", prefix.getOriginal());
         assertEquals("", prefix.getRoot());
         assertFalse(prefix.isEndsWithSlash());
@@ -60,7 +60,7 @@
         assertTrue(prefix.getIndexToComputedFieldsMap().isEmpty());

         String prefix2 = "hotel";
-        prefix = new ExternalDataPrefix(prefix2);
+        prefix = new ExternalDataPrefix(prefix2, null);
         assertEquals("hotel", prefix.getOriginal());
         assertEquals("hotel", prefix.getRoot());
         assertFalse(prefix.isEndsWithSlash());
@@ -71,7 +71,7 @@
         assertTrue(prefix.getIndexToComputedFieldsMap().isEmpty());

         String prefix3 = "hotel/{hotel-id:inT}/";
-        prefix = new ExternalDataPrefix(prefix3);
+        prefix = new ExternalDataPrefix(prefix3, null);
         assertEquals("hotel/{hotel-id:inT}/", prefix.getOriginal());
         assertEquals("hotel/", prefix.getRoot());
         assertTrue(prefix.isEndsWithSlash());
@@ -82,7 +82,7 @@
         assertEquals("(.+)", 
prefix.getIndexToComputedFieldsMap().get(1).getExpression());

         String prefix4 = "hotel/{hotel-id:int}-{hotel-name:sTRing}";
-        prefix = new ExternalDataPrefix(prefix4);
+        prefix = new ExternalDataPrefix(prefix4, null);
         assertEquals("hotel/{hotel-id:int}-{hotel-name:sTRing}", 
prefix.getOriginal());
         assertEquals("hotel", prefix.getRoot());
         assertFalse(prefix.isEndsWithSlash());
@@ -93,7 +93,7 @@
         assertEquals("(.+)-(.+)", 
prefix.getIndexToComputedFieldsMap().get(1).getExpression());

         String prefix5 = 
"hotel/something/{hotel-id:int}-{hotel-name:sTRing}/review/{year:int}-{month:int}-{day:int}/";
-        prefix = new ExternalDataPrefix(prefix5);
+        prefix = new ExternalDataPrefix(prefix5, null);
         
assertEquals("hotel/something/{hotel-id:int}-{hotel-name:sTRing}/review/{year:int}-{month:int}-{day:int}/",
                 prefix.getOriginal());
         assertEquals("hotel/something/", prefix.getRoot());
@@ -107,7 +107,7 @@
         assertEquals("(.+)-(.+)-(.+)", 
prefix.getIndexToComputedFieldsMap().get(4).getExpression());

         String prefix6 = 
"hotel/something/{hotel-id:int}-{hotel-name:sTRing}/review/{year:int}/{month:int}/{day:int}";
-        prefix = new ExternalDataPrefix(prefix6);
+        prefix = new ExternalDataPrefix(prefix6, null);
         
assertEquals("hotel/something/{hotel-id:int}-{hotel-name:sTRing}/review/{year:int}/{month:int}/{day:int}",
                 prefix.getOriginal());
         assertEquals("hotel/something", prefix.getRoot());
@@ -123,7 +123,7 @@
         assertEquals("(.+)", 
prefix.getIndexToComputedFieldsMap().get(6).getExpression());

         String prefix7 = "hotel/{hotel.details.id:int}-{hotel-name:sTRing}";
-        prefix = new ExternalDataPrefix(prefix7);
+        prefix = new ExternalDataPrefix(prefix7, null);
         assertEquals("hotel/{hotel.details.id:int}-{hotel-name:sTRing}", 
prefix.getOriginal());
         assertEquals("hotel", prefix.getRoot());
         assertFalse(prefix.isEndsWithSlash());
@@ -134,7 +134,7 @@

         String prefix8 =
                 
"hotel/hotel-{hotel-id:int}-hotel-{hotel-name:sTRing}/review/year-{year:int}/{month:int}-month/day-{day:int}-day";
-        prefix = new ExternalDataPrefix(prefix8);
+        prefix = new ExternalDataPrefix(prefix8, null);
         assertEquals(
                 
"hotel/hotel-{hotel-id:int}-hotel-{hotel-name:sTRing}/review/year-{year:int}/{month:int}-month/day-{day:int}-day",
                 prefix.getOriginal());
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/filter/001/001.026.query.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/filter/001/001.026.query.sqlpp
index c284cf7..8262518 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/filter/001/001.026.query.sqlpp
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/filter/001/001.026.query.sqlpp
@@ -16,7 +16,7 @@
  * specific language governing permissions and limitations
  * under the License.
  */
--- param max-warnings:json=1000
+ˇˇ
 USE test;
 SET `compiler.column.filter` "true";

diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/filter/type-mismatch/test.000.ddl.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/filter/type-mismatch/test.000.ddl.sqlpp
new file mode 100644
index 0000000..7b70bd3
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/filter/type-mismatch/test.000.ddl.sqlpp
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+DROP DATAVERSE test IF EXISTS;
+CREATE DATAVERSE test;
+USE test;
+
+CREATE TYPE test AS {
+};
+
+
+CREATE EXTERNAL DATASET test(test) USING S3 (
+    ("accessKeyId"="dummyAccessKey"),
+    ("secretAccessKey"="dummySecretKey"),
+    ("region"="us-west-2"),
+    ("serviceEndpoint"="http://127.0.0.1:8001";),
+    ("container"="playground"),
+    ("definition"="external-filter/{name:bigint}"),
+    ("format"="json")
+);
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/filter/type-mismatch/test.010.query.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/filter/type-mismatch/test.010.query.sqlpp
new file mode 100644
index 0000000..13b6abe
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/filter/type-mismatch/test.010.query.sqlpp
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+// param max-warnings:json=10
+
+USE test;
+
+SELECT value count(*)
+FROM test t
+WHERE t.name = "department";
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/s3/filter/type-mismatch/result.010.adm
 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/s3/filter/type-mismatch/result.010.adm
new file mode 100644
index 0000000..c227083
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/s3/filter/type-mismatch/result.010.adm
@@ -0,0 +1 @@
+0
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
 
b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
index 4414ebf..e50ec3c 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
@@ -226,6 +226,17 @@
         <output-dir compare="Text">one-field</output-dir>
       </compilation-unit>
     </test-case>
+    <test-case FilePath="external-dataset/s3/filter" check-warnings="true">
+      <compilation-unit name="type-mismatch">
+        <output-dir compare="Text">type-mismatch</output-dir>
+        <expected-warn>Failed to evaluate computed field. File: 
'external-filter/department/accounting/0.json'. Computed Field Name: 'name'. 
Computed Field Type: 'bigint'. Computed Field Value: 'department'. Reason: 
'java.lang.NumberFormatException: For input string: 
"department"'</expected-warn>
+        <expected-warn>Failed to evaluate computed field. File: 
'external-filter/department/engineering/0.json'. Computed Field Name: 'name'. 
Computed Field Type: 'bigint'. Computed Field Value: 'department'. Reason: 
'java.lang.NumberFormatException: For input string: 
"department"'</expected-warn>
+        <expected-warn>Failed to evaluate computed field. File: 
'external-filter/department/hr/0.json'. Computed Field Name: 'name'. Computed 
Field Type: 'bigint'. Computed Field Value: 'department'. Reason: 
'java.lang.NumberFormatException: For input string: 
"department"'</expected-warn>
+        <expected-warn>Failed to evaluate computed field. File: 
'external-filter/last-name/Jones/0.json'. Computed Field Name: 'name'. Computed 
Field Type: 'bigint'. Computed Field Value: 'last-name'. Reason: 
'java.lang.NumberFormatException: For input string: "last-name"'</expected-warn>
+        <expected-warn>Failed to evaluate computed field. File: 
'external-filter/last-name/miller/0.json'. Computed Field Name: 'name'. 
Computed Field Type: 'bigint'. Computed Field Value: 'last-name'. Reason: 
'java.lang.NumberFormatException: For input string: "last-name"'</expected-warn>
+        <expected-warn>Failed to evaluate computed field. File: 
'external-filter/last-name/smith/0.json'. Computed Field Name: 'name'. Computed 
Field Type: 'bigint'. Computed Field Value: 'last-name'. Reason: 
'java.lang.NumberFormatException: For input string: "last-name"'</expected-warn>
+      </compilation-unit>
+    </test-case>
     <test-case FilePath="external-dataset">
       <compilation-unit name="common/empty-string-definition">
         <placeholder name="adapter" value="S3" />
diff --git 
a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/exceptions/ErrorCode.java
 
b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/exceptions/ErrorCode.java
index ab2a978..3ac665c 100644
--- 
a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/exceptions/ErrorCode.java
+++ 
b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/exceptions/ErrorCode.java
@@ -277,6 +277,7 @@
     ERROR_READING_ICEBERG_METADATA(1180),
     UNSUPPORTED_COMPUTED_FIELD_TYPE(1181),
     FAILED_TO_CALCULATE_COMPUTED_FIELDS(1182),
+    FAILED_TO_EVALUATE_COMPUTED_FIELD(1183),

     // Feed errors
     DATAFLOW_ILLEGAL_STATE(3001),
diff --git 
a/asterixdb/asterix-common/src/main/resources/asx_errormsg/en.properties 
b/asterixdb/asterix-common/src/main/resources/asx_errormsg/en.properties
index b51b09a..3c9543a 100644
--- a/asterixdb/asterix-common/src/main/resources/asx_errormsg/en.properties
+++ b/asterixdb/asterix-common/src/main/resources/asx_errormsg/en.properties
@@ -279,6 +279,7 @@
 1180 = Error reading iceberg data
 1181 = Unsupported computed field type: %1$s
 1182 = Failed to calculate computed fields: %1$s
+1183 = Failed to evaluate computed field. File: '%1$s'. Computed Field Name: 
'%2$s'. Computed Field Type: '%3$s'. Computed Field Value: '%4$s'. Reason: 
'%5$s'

 # Feed Errors
 3001 = Illegal state.
diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/filter/ExternalFilterValueEvaluator.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/filter/ExternalFilterValueEvaluator.java
index 6e3556c..a07e067 100644
--- 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/filter/ExternalFilterValueEvaluator.java
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/filter/ExternalFilterValueEvaluator.java
@@ -61,16 +61,21 @@
     private void writeValue(ATypeTag typeTag, String stringValue) throws 
HyracksDataException {
         DataOutput output = value.getDataOutput();
         SerializerDeserializerUtil.serializeTag(typeTag, output);
-        switch (typeTag) {
-            case TINYINT:
-            case SMALLINT:
-            case INTEGER:
-            case BIGINT:
-                
Integer64SerializerDeserializer.write(Long.parseLong(stringValue), output);
-            case DOUBLE:
-                
DoubleSerializerDeserializer.write(Double.parseDouble(stringValue), output);
-            case STRING:
-                stringSerDer.serialize(stringValue, output);
+
+        try {
+            switch (typeTag) {
+                case TINYINT:
+                case SMALLINT:
+                case INTEGER:
+                case BIGINT:
+                    
Integer64SerializerDeserializer.write(Long.parseLong(stringValue), output);
+                case DOUBLE:
+                    
DoubleSerializerDeserializer.write(Double.parseDouble(stringValue), output);
+                case STRING:
+                    stringSerDer.serialize(stringValue, output);
+            }
+        } catch (Exception ex) {
+            throw HyracksDataException.create(ex);
         }
     }
 }
diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/AwsS3InputStreamFactory.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/AwsS3InputStreamFactory.java
index 9fa2b5a..7ae992a 100644
--- 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/AwsS3InputStreamFactory.java
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/AwsS3InputStreamFactory.java
@@ -58,7 +58,7 @@
         IncludeExcludeMatcher includeExcludeMatcher = 
ExternalDataUtils.getIncludeExcludeMatchers(configuration);

         //Get a list of S3 objects
-        ExternalDataPrefix externalDataPrefix = new 
ExternalDataPrefix(configuration);
+        ExternalDataPrefix externalDataPrefix = new 
ExternalDataPrefix(configuration, warningCollector);
         configuration.put(ExternalDataPrefix.PREFIX_ROOT_FIELD_NAME, 
externalDataPrefix.getRoot());

         // TODO(htowaileb): Since we're using the root to load the files then 
start filtering, it might end up being
diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataPrefix.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataPrefix.java
index 2a707fb..26d66dc 100644
--- 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataPrefix.java
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataPrefix.java
@@ -46,6 +46,10 @@
 import org.apache.asterix.om.utils.ProjectionFiltrationTypeUtil;
 import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
 import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.api.exceptions.IWarningCollector;
+import org.apache.hyracks.api.exceptions.NoOpWarningCollector;
+import org.apache.hyracks.api.exceptions.Warning;
+import org.apache.hyracks.util.LogRedactionUtil;

 public class ExternalDataPrefix {

@@ -53,6 +57,7 @@
     private String root;
     private final boolean endsWithSlash;
     private final List<String> segments;
+    private final IWarningCollector warningCollector;

     private final List<String> computedFieldNames = new ArrayList<>();
     private final List<IAType> computedFieldTypes = new ArrayList<>();
@@ -69,10 +74,16 @@
     }

     public ExternalDataPrefix(Map<String, String> configuration) throws 
AlgebricksException {
-        this(getDefinitionOrPath(configuration));
+        this(getDefinitionOrPath(configuration), null);
     }

-    public ExternalDataPrefix(String prefix) throws AlgebricksException {
+    public ExternalDataPrefix(Map<String, String> configuration, 
IWarningCollector warningCollector)
+            throws AlgebricksException {
+        this(getDefinitionOrPath(configuration), warningCollector);
+    }
+
+    public ExternalDataPrefix(String prefix, IWarningCollector 
warningCollector) throws AlgebricksException {
+        this.warningCollector = warningCollector != null ? warningCollector : 
NoOpWarningCollector.INSTANCE;
         this.original = prefix != null ? prefix : "";
         this.endsWithSlash = this.original.endsWith("/");

@@ -251,10 +262,27 @@
         // extract values for all compute fields and set them in the evaluator
         // TODO provide the List to avoid array creation
         List<String> values = extractValues(keySegments);
-        for (int i = 0; i < computedFieldNames.size(); i++) {
-            if (evaluator.isComputedFieldUsed(i)) {
-                evaluator.setValue(i, values.get(i));
+
+        String computedFieldName = null;
+        IAType computedFieldType = null;
+        String computedFieldValue = null;
+        try {
+            for (int i = 0; i < computedFieldNames.size(); i++) {
+                computedFieldName = computedFieldNames.get(i);
+                computedFieldType = computedFieldTypes.get(i);
+                computedFieldValue = values.get(i);
+
+                if (evaluator.isComputedFieldUsed(i)) {
+                    evaluator.setValue(i, computedFieldValue);
+                }
             }
+        } catch (HyracksDataException ex) {
+            if (warningCollector.shouldWarn()) {
+                warningCollector.warn(Warning.of(null, 
ErrorCode.FAILED_TO_EVALUATE_COMPUTED_FIELD,
+                        LogRedactionUtil.userData(key), computedFieldName, 
computedFieldType,
+                        LogRedactionUtil.userData(computedFieldValue), 
LogRedactionUtil.userData(ex.getMessage())));
+            }
+            return false;
         }

         return evaluator.evaluate();

--
To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/17715
To unsubscribe, or for help writing mail filters, visit 
https://asterix-gerrit.ics.uci.edu/settings

Gerrit-Project: asterixdb
Gerrit-Branch: master
Gerrit-Change-Id: I3208bcd8b59a0ca6351053bc3ce06e76ecb1d462
Gerrit-Change-Number: 17715
Gerrit-PatchSet: 1
Gerrit-Owner: Hussain Towaileb <[email protected]>
Gerrit-MessageType: newchange

Reply via email to