This is an automated email from the ASF dual-hosted git repository.

imaxon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/asterixdb.git

commit 3b6982ce7fa50ff4005d50847a944ecbaf3ecb30
Author: Michael Blow <[email protected]>
AuthorDate: Thu May 20 16:43:43 2021 -0400

    [NO ISSUE][MISC] Improve parser error reporting
    
    Change-Id: I8707d9e2a952693b6501e9e63aff9162a77541cc
    Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/11543
    Integration-Tests: Jenkins <[email protected]>
    Tested-by: Jenkins <[email protected]>
    Reviewed-by: Michael Blow <[email protected]>
    Reviewed-by: Till Westmann <[email protected]>
---
 ...stsuite_external_dataset_azure_blob_storage.xml | 10 ++++-----
 .../runtimets/testsuite_external_dataset_s3.xml    | 10 ++++-----
 ...testsuite_external_dataset_s3_one_partition.xml | 20 ++++++++---------
 .../reader/stream/QuotedLineRecordReader.java      |  2 +-
 .../reader/stream/SemiStructuredRecordReader.java  |  2 +-
 .../external/parser/AbstractJsonDataParser.java    | 18 +++++++---------
 .../external/parser/DelimitedDataParser.java       |  2 +-
 .../java/org/apache/hyracks}/util/ParseUtil.java   | 25 +++++++++++++++++++---
 .../src/main/resources/errormsg/en.properties      |  2 +-
 .../file/FieldCursorForDelimitedDataParser.java    |  5 ++---
 10 files changed, 56 insertions(+), 40 deletions(-)

diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_azure_blob_storage.xml
 
b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_azure_blob_storage.xml
index df60e60..1e302da 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_azure_blob_storage.xml
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_azure_blob_storage.xml
@@ -143,11 +143,11 @@
       <compilation-unit name="common/malformed-json">
         <placeholder name="adapter" value="AZUREBLOB" />
         <output-dir compare="Text">common/malformed-json</output-dir>
-        <expected-error>Parsing error at malformed-data/duplicate-fields.json 
line 1 field field: Duplicate field 'field'</expected-error>
-        <expected-error>Parsing error at malformed-data/malformed-json.json 
line 1 field field: Unexpected character ('}' (code 125)): was expecting 
double-quote to start field name</expected-error>
-        <expected-error>Parsing error at malformed-data/malformed-json-2.json 
line 4 field array_f: Unexpected character (']' (code 93)): expected a valid 
value (JSON String, Number, Array, Object or token 'null', 'true' or 
'false')</expected-error>
-        <expected-error>Parsing error at malformed-data/malformed-jsonl-1.json 
line 3 field field2: Unrecognized token 'truee': was expecting (JSON String, 
Number, Array, Object or token 'null', 'true' or 'false')</expected-error>
-        <expected-error>Parsing error at malformed-data/malformed-jsonl-2.json 
line 11 field array_f: Unexpected character (']' (code 93)): expected a valid 
value (JSON String, Number, Array, Object or token 'null', 'true' or 
'false')</expected-error>
+        <expected-error>Parsing error at malformed-data/duplicate-fields.json 
line 1 field 'field': Duplicate field 'field'</expected-error>
+        <expected-error>Parsing error at malformed-data/malformed-json.json 
line 1 field 'field': Unexpected character ('}' (code 125)): was expecting 
double-quote to start field name</expected-error>
+        <expected-error>Parsing error at malformed-data/malformed-json-2.json 
line 4 field 'array_f': Unexpected character (']' (code 93)): expected a valid 
value (JSON String, Number, Array, Object or token 'null', 'true' or 
'false')</expected-error>
+        <expected-error>Parsing error at malformed-data/malformed-jsonl-1.json 
line 3 field 'field2': Unrecognized token 'truee': was expecting (JSON String, 
Number, Array, Object or token 'null', 'true' or 'false')</expected-error>
+        <expected-error>Parsing error at malformed-data/malformed-jsonl-2.json 
line 11 field 'array_f': Unexpected character (']' (code 93)): expected a valid 
value (JSON String, Number, Array, Object or token 'null', 'true' or 
'false')</expected-error>
       </compilation-unit>
     </test-case>
     <test-case FilePath="external-dataset">
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
 
b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
index 9bc463c..b354e65 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
@@ -92,11 +92,11 @@
       <compilation-unit name="common/malformed-json">
         <placeholder name="adapter" value="S3" />
         <output-dir compare="Text">common/malformed-json</output-dir>
-        <expected-error>Parsing error at malformed-data/duplicate-fields.json 
line 1 field field: Duplicate field 'field'</expected-error>
-        <expected-error>Parsing error at malformed-data/malformed-json.json 
line 1 field field: Unexpected character ('}' (code 125)): was expecting 
double-quote to start field name</expected-error>
-        <expected-error>Parsing error at malformed-data/malformed-json-2.json 
line 4 field array_f: Unexpected character (']' (code 93)): expected a valid 
value (JSON String, Number, Array, Object or token 'null', 'true' or 
'false')</expected-error>
-        <expected-error>Parsing error at malformed-data/malformed-jsonl-1.json 
line 3 field field2: Unrecognized token 'truee': was expecting (JSON String, 
Number, Array, Object or token 'null', 'true' or 'false')</expected-error>
-        <expected-error>Parsing error at malformed-data/malformed-jsonl-2.json 
line 11 field array_f: Unexpected character (']' (code 93)): expected a valid 
value (JSON String, Number, Array, Object or token 'null', 'true' or 
'false')</expected-error>
+        <expected-error>Parsing error at malformed-data/duplicate-fields.json 
line 1 field 'field': Duplicate field 'field'</expected-error>
+        <expected-error>Parsing error at malformed-data/malformed-json.json 
line 1 field 'field': Unexpected character ('}' (code 125)): was expecting 
double-quote to start field name</expected-error>
+        <expected-error>Parsing error at malformed-data/malformed-json-2.json 
line 4 field 'array_f': Unexpected character (']' (code 93)): expected a valid 
value (JSON String, Number, Array, Object or token 'null', 'true' or 
'false')</expected-error>
+        <expected-error>Parsing error at malformed-data/malformed-jsonl-1.json 
line 3 field 'field2': Unrecognized token 'truee': was expecting (JSON String, 
Number, Array, Object or token 'null', 'true' or 'false')</expected-error>
+        <expected-error>Parsing error at malformed-data/malformed-jsonl-2.json 
line 11 field 'array_f': Unexpected character (']' (code 93)): expected a valid 
value (JSON String, Number, Array, Object or token 'null', 'true' or 
'false')</expected-error>
       </compilation-unit>
     </test-case>
     <test-case FilePath="external-dataset">
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3_one_partition.xml
 
b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3_one_partition.xml
index 92b5e32..d02647d 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3_one_partition.xml
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3_one_partition.xml
@@ -49,17 +49,17 @@
         <output-dir compare="Text">common/csv-warnings</output-dir>
         <expected-warn>Parsing error at data_dir/no_h_missing_fields.csv line 
2 field 3: some fields are missing</expected-warn>
         <expected-warn>Parsing error at data_dir/no_h_no_closing_q.csv line 2 
field 0: malformed input record ended abruptly</expected-warn>
-        <expected-warn>Parsing error at  line 2 field 0: malformed input 
record ended abruptly</expected-warn>
+        <expected-warn>Parsing error at line 2 field 0: malformed input record 
ended abruptly</expected-warn>
 
-        <expected-warn>Parsing error at  line 5 field 3: invalid 
value</expected-warn>
-        <expected-warn>Parsing error at  line 2 field 1: invalid 
value</expected-warn>
-        <expected-warn>Parsing error at  line 11 field 1: invalid 
value</expected-warn>
-        <expected-warn>Parsing error at  line 3 field 1: invalid 
value</expected-warn>
-        <expected-warn>Parsing error at  line 4 field 1: invalid 
value</expected-warn>
-        <expected-warn>Parsing error at  line 7 field 7: invalid 
value</expected-warn>
-        <expected-warn>Parsing error at  line 13 field 7: invalid 
value</expected-warn>
-        <expected-warn>Parsing error at  line 12 field 3: invalid 
value</expected-warn>
-        <expected-warn>Parsing error at  line 9 field 6: a quote should be in 
the beginning</expected-warn>
+        <expected-warn>Parsing error at line 5 field 3: invalid 
value</expected-warn>
+        <expected-warn>Parsing error at line 2 field 1: invalid 
value</expected-warn>
+        <expected-warn>Parsing error at line 11 field 1: invalid 
value</expected-warn>
+        <expected-warn>Parsing error at line 3 field 1: invalid 
value</expected-warn>
+        <expected-warn>Parsing error at line 4 field 1: invalid 
value</expected-warn>
+        <expected-warn>Parsing error at line 7 field 7: invalid 
value</expected-warn>
+        <expected-warn>Parsing error at line 13 field 7: invalid 
value</expected-warn>
+        <expected-warn>Parsing error at line 12 field 3: invalid 
value</expected-warn>
+        <expected-warn>Parsing error at line 9 field 6: a quote should be in 
the beginning</expected-warn>
 
         <expected-warn>Parsing error at data_dir/h_invalid_values.csv line 5 
field 3: invalid value</expected-warn>
         <expected-warn>Parsing error at data_dir/h_invalid_values.csv line 2 
field 1: invalid value</expected-warn>
diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/QuotedLineRecordReader.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/QuotedLineRecordReader.java
index 3a502d0..4c253bc 100644
--- 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/QuotedLineRecordReader.java
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/QuotedLineRecordReader.java
@@ -29,10 +29,10 @@ import java.util.Map;
 import org.apache.asterix.external.api.AsterixInputStream;
 import org.apache.asterix.external.util.ExternalDataConstants;
 import org.apache.asterix.external.util.ExternalDataUtils;
-import org.apache.asterix.external.util.ParseUtil;
 import org.apache.hyracks.api.context.IHyracksTaskContext;
 import org.apache.hyracks.api.exceptions.HyracksDataException;
 import org.apache.hyracks.api.exceptions.IWarningCollector;
+import org.apache.hyracks.util.ParseUtil;
 
 public class QuotedLineRecordReader extends LineRecordReader {
 
diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/SemiStructuredRecordReader.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/SemiStructuredRecordReader.java
index 2ff5cfa..0e23e46 100644
--- 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/SemiStructuredRecordReader.java
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/SemiStructuredRecordReader.java
@@ -40,10 +40,10 @@ import 
org.apache.asterix.common.exceptions.RuntimeDataException;
 import org.apache.asterix.external.api.AsterixInputStream;
 import org.apache.asterix.external.util.ExternalDataConstants;
 import org.apache.asterix.external.util.ExternalDataUtils;
-import org.apache.asterix.external.util.ParseUtil;
 import org.apache.hyracks.api.context.IHyracksTaskContext;
 import org.apache.hyracks.api.exceptions.HyracksDataException;
 import org.apache.hyracks.api.exceptions.IWarningCollector;
+import org.apache.hyracks.util.ParseUtil;
 
 public class SemiStructuredRecordReader extends StreamRecordReader {
 
diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/AbstractJsonDataParser.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/AbstractJsonDataParser.java
index 2bf0df4..2d20cc2 100644
--- 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/AbstractJsonDataParser.java
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/AbstractJsonDataParser.java
@@ -18,6 +18,8 @@
  */
 package org.apache.asterix.external.parser;
 
+import static org.apache.hyracks.api.exceptions.ErrorCode.PARSING_ERROR;
+
 import java.io.DataOutput;
 import java.io.IOException;
 import java.util.BitSet;
@@ -46,6 +48,7 @@ import 
org.apache.asterix.runtime.exceptions.UnsupportedTypeException;
 import org.apache.hyracks.api.exceptions.HyracksDataException;
 import org.apache.hyracks.api.util.ExceptionUtils;
 import org.apache.hyracks.data.std.api.IMutableValueStorage;
+import org.apache.hyracks.util.ParseUtil;
 
 import com.fasterxml.jackson.core.JsonFactory;
 import com.fasterxml.jackson.core.JsonParseException;
@@ -433,18 +436,13 @@ public abstract class AbstractJsonDataParser extends 
AbstractNestedDataParser<AD
             }
             long lineNum = lineNumber.getAsLong() + 
jsonParser.getCurrentLocation().getLineNr() - 1;
             JsonStreamContext parsingContext = jsonParser.getParsingContext();
-            String fieldName = "N/A";
-            while (parsingContext != null) {
-                String currentFieldName = parsingContext.getCurrentName();
-                if (currentFieldName != null) {
-                    fieldName = currentFieldName;
-                    break;
-                }
+            String fieldName = null;
+            while (parsingContext != null && fieldName == null) {
+                fieldName = parsingContext.getCurrentName();
                 parsingContext = parsingContext.getParent();
             }
-
-            return 
HyracksDataException.create(org.apache.hyracks.api.exceptions.ErrorCode.PARSING_ERROR,
-                    dataSourceName.get(), lineNum, fieldName, msg);
+            final String locationDetails = 
ParseUtil.asLocationDetailString(dataSourceName.get(), lineNum, fieldName);
+            return HyracksDataException.create(PARSING_ERROR, locationDetails, 
msg);
         }
         return new 
RuntimeDataException(ErrorCode.RECORD_READER_MALFORMED_INPUT_STREAM, e);
     }
diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/DelimitedDataParser.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/DelimitedDataParser.java
index 60e6e77..590f51d 100644
--- 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/DelimitedDataParser.java
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/DelimitedDataParser.java
@@ -38,7 +38,6 @@ import org.apache.asterix.external.api.IRawRecord;
 import org.apache.asterix.external.api.IRecordDataParser;
 import org.apache.asterix.external.api.IStreamDataParser;
 import org.apache.asterix.external.util.ExternalDataConstants;
-import org.apache.asterix.external.util.ParseUtil;
 import org.apache.asterix.om.base.AMutableString;
 import org.apache.asterix.om.typecomputer.impl.TypeComputeUtils;
 import org.apache.asterix.om.types.ARecordType;
@@ -52,6 +51,7 @@ import 
org.apache.hyracks.data.std.util.ArrayBackedValueStorage;
 import org.apache.hyracks.dataflow.common.data.parsers.IValueParser;
 import org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory;
 import org.apache.hyracks.dataflow.std.file.FieldCursorForDelimitedDataParser;
+import org.apache.hyracks.util.ParseUtil;
 
 public class DelimitedDataParser extends AbstractDataParser implements 
IStreamDataParser, IRecordDataParser<char[]> {
 
diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ParseUtil.java
 
b/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/util/ParseUtil.java
similarity index 56%
rename from 
asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ParseUtil.java
rename to 
hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/util/ParseUtil.java
index 5a46af7..63fec09 100644
--- 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ParseUtil.java
+++ 
b/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/util/ParseUtil.java
@@ -16,7 +16,9 @@
  * specific language governing permissions and limitations
  * under the License.
  */
-package org.apache.asterix.external.util;
+package org.apache.hyracks.util;
+
+import java.util.StringJoiner;
 
 import org.apache.hyracks.api.exceptions.ErrorCode;
 import org.apache.hyracks.api.exceptions.IWarningCollector;
@@ -29,7 +31,24 @@ public class ParseUtil {
 
     public static void warn(IWarningCollector warningCollector, String 
dataSourceName, long lineNum, int fieldNum,
             String warnMessage) {
-        warningCollector
-                .warn(Warning.of(null, ErrorCode.PARSING_ERROR, 
dataSourceName, lineNum, fieldNum, warnMessage));
+        warningCollector.warn(Warning.of(null, ErrorCode.PARSING_ERROR,
+                asLocationDetailString(dataSourceName, lineNum, fieldNum), 
warnMessage));
+    }
+
+    public static String asLocationDetailString(String dataSource, long 
lineNum, Object fieldIdentifier) {
+        StringJoiner details = new StringJoiner(" ");
+        details.setEmptyValue("N/A");
+        if (dataSource != null && !dataSource.isEmpty()) {
+            details.add(dataSource);
+        }
+        if (lineNum >= 0) {
+            details.add("line " + lineNum);
+        }
+        if (fieldIdentifier instanceof Number) {
+            details.add("field " + fieldIdentifier);
+        } else if (fieldIdentifier instanceof String && !((String) 
fieldIdentifier).isEmpty()) {
+            details.add("field '" + fieldIdentifier + "'");
+        }
+        return "at " + details;
     }
 }
diff --git 
a/hyracks-fullstack/hyracks/hyracks-api/src/main/resources/errormsg/en.properties
 
b/hyracks-fullstack/hyracks/hyracks-api/src/main/resources/errormsg/en.properties
index 9f04fb2..4d9c60b 100644
--- 
a/hyracks-fullstack/hyracks/hyracks-api/src/main/resources/errormsg/en.properties
+++ 
b/hyracks-fullstack/hyracks/hyracks-api/src/main/resources/errormsg/en.properties
@@ -141,7 +141,7 @@
 121 = A numeric type promotion error has occurred: %1$s
 122 = Encountered an error while printing the plan: %1$s
 123 = Insufficient memory is provided for the join operators, please increase 
the join memory budget.
-124 = Parsing error at %1$s line %2$s field %3$s: %4$s
+124 = Parsing error %s: %s
 125 = Invalid inverted list type traits: %1$s
 126 = Illegal state. %1$s
 
diff --git 
a/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/file/FieldCursorForDelimitedDataParser.java
 
b/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/file/FieldCursorForDelimitedDataParser.java
index 936d63e..ffc87cd 100644
--- 
a/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/file/FieldCursorForDelimitedDataParser.java
+++ 
b/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/file/FieldCursorForDelimitedDataParser.java
@@ -23,9 +23,8 @@ import java.io.Reader;
 import java.util.Arrays;
 import java.util.function.Supplier;
 
-import org.apache.hyracks.api.exceptions.ErrorCode;
 import org.apache.hyracks.api.exceptions.IWarningCollector;
-import org.apache.hyracks.api.exceptions.Warning;
+import org.apache.hyracks.util.ParseUtil;
 
 public class FieldCursorForDelimitedDataParser {
 
@@ -448,6 +447,6 @@ public class FieldCursorForDelimitedDataParser {
     }
 
     private void warn(String message) {
-        warnings.warn(Warning.of(null, ErrorCode.PARSING_ERROR, 
dataSourceName.get(), lineCount, fieldCount, message));
+        ParseUtil.warn(warnings, dataSourceName.get(), lineCount, fieldCount, 
message);
     }
 }

Reply via email to