This is an automated email from the ASF dual-hosted git repository. imaxon pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/asterixdb.git
commit 3b6982ce7fa50ff4005d50847a944ecbaf3ecb30 Author: Michael Blow <[email protected]> AuthorDate: Thu May 20 16:43:43 2021 -0400 [NO ISSUE][MISC] Improve parser error reporting Change-Id: I8707d9e2a952693b6501e9e63aff9162a77541cc Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/11543 Integration-Tests: Jenkins <[email protected]> Tested-by: Jenkins <[email protected]> Reviewed-by: Michael Blow <[email protected]> Reviewed-by: Till Westmann <[email protected]> --- ...stsuite_external_dataset_azure_blob_storage.xml | 10 ++++----- .../runtimets/testsuite_external_dataset_s3.xml | 10 ++++----- ...testsuite_external_dataset_s3_one_partition.xml | 20 ++++++++--------- .../reader/stream/QuotedLineRecordReader.java | 2 +- .../reader/stream/SemiStructuredRecordReader.java | 2 +- .../external/parser/AbstractJsonDataParser.java | 18 +++++++--------- .../external/parser/DelimitedDataParser.java | 2 +- .../java/org/apache/hyracks}/util/ParseUtil.java | 25 +++++++++++++++++++--- .../src/main/resources/errormsg/en.properties | 2 +- .../file/FieldCursorForDelimitedDataParser.java | 5 ++--- 10 files changed, 56 insertions(+), 40 deletions(-) diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_azure_blob_storage.xml b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_azure_blob_storage.xml index df60e60..1e302da 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_azure_blob_storage.xml +++ b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_azure_blob_storage.xml @@ -143,11 +143,11 @@ <compilation-unit name="common/malformed-json"> <placeholder name="adapter" value="AZUREBLOB" /> <output-dir compare="Text">common/malformed-json</output-dir> - <expected-error>Parsing error at malformed-data/duplicate-fields.json line 1 field field: Duplicate field 'field'</expected-error> - <expected-error>Parsing error at malformed-data/malformed-json.json line 1 field field: Unexpected character ('}' (code 125)): was expecting double-quote to start field name</expected-error> - <expected-error>Parsing error at malformed-data/malformed-json-2.json line 4 field array_f: Unexpected character (']' (code 93)): expected a valid value (JSON String, Number, Array, Object or token 'null', 'true' or 'false')</expected-error> - <expected-error>Parsing error at malformed-data/malformed-jsonl-1.json line 3 field field2: Unrecognized token 'truee': was expecting (JSON String, Number, Array, Object or token 'null', 'true' or 'false')</expected-error> - <expected-error>Parsing error at malformed-data/malformed-jsonl-2.json line 11 field array_f: Unexpected character (']' (code 93)): expected a valid value (JSON String, Number, Array, Object or token 'null', 'true' or 'false')</expected-error> + <expected-error>Parsing error at malformed-data/duplicate-fields.json line 1 field 'field': Duplicate field 'field'</expected-error> + <expected-error>Parsing error at malformed-data/malformed-json.json line 1 field 'field': Unexpected character ('}' (code 125)): was expecting double-quote to start field name</expected-error> + <expected-error>Parsing error at malformed-data/malformed-json-2.json line 4 field 'array_f': Unexpected character (']' (code 93)): expected a valid value (JSON String, Number, Array, Object or token 'null', 'true' or 'false')</expected-error> + <expected-error>Parsing error at malformed-data/malformed-jsonl-1.json line 3 field 'field2': Unrecognized token 'truee': was expecting (JSON String, Number, Array, Object or token 'null', 'true' or 'false')</expected-error> + <expected-error>Parsing error at malformed-data/malformed-jsonl-2.json line 11 field 'array_f': Unexpected character (']' (code 93)): expected a valid value (JSON String, Number, Array, Object or token 'null', 'true' or 'false')</expected-error> </compilation-unit> </test-case> <test-case FilePath="external-dataset"> diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml index 9bc463c..b354e65 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml +++ b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml @@ -92,11 +92,11 @@ <compilation-unit name="common/malformed-json"> <placeholder name="adapter" value="S3" /> <output-dir compare="Text">common/malformed-json</output-dir> - <expected-error>Parsing error at malformed-data/duplicate-fields.json line 1 field field: Duplicate field 'field'</expected-error> - <expected-error>Parsing error at malformed-data/malformed-json.json line 1 field field: Unexpected character ('}' (code 125)): was expecting double-quote to start field name</expected-error> - <expected-error>Parsing error at malformed-data/malformed-json-2.json line 4 field array_f: Unexpected character (']' (code 93)): expected a valid value (JSON String, Number, Array, Object or token 'null', 'true' or 'false')</expected-error> - <expected-error>Parsing error at malformed-data/malformed-jsonl-1.json line 3 field field2: Unrecognized token 'truee': was expecting (JSON String, Number, Array, Object or token 'null', 'true' or 'false')</expected-error> - <expected-error>Parsing error at malformed-data/malformed-jsonl-2.json line 11 field array_f: Unexpected character (']' (code 93)): expected a valid value (JSON String, Number, Array, Object or token 'null', 'true' or 'false')</expected-error> + <expected-error>Parsing error at malformed-data/duplicate-fields.json line 1 field 'field': Duplicate field 'field'</expected-error> + <expected-error>Parsing error at malformed-data/malformed-json.json line 1 field 'field': Unexpected character ('}' (code 125)): was expecting double-quote to start field name</expected-error> + <expected-error>Parsing error at malformed-data/malformed-json-2.json line 4 field 'array_f': Unexpected character (']' (code 93)): expected a valid value (JSON String, Number, Array, Object or token 'null', 'true' or 'false')</expected-error> + <expected-error>Parsing error at malformed-data/malformed-jsonl-1.json line 3 field 'field2': Unrecognized token 'truee': was expecting (JSON String, Number, Array, Object or token 'null', 'true' or 'false')</expected-error> + <expected-error>Parsing error at malformed-data/malformed-jsonl-2.json line 11 field 'array_f': Unexpected character (']' (code 93)): expected a valid value (JSON String, Number, Array, Object or token 'null', 'true' or 'false')</expected-error> </compilation-unit> </test-case> <test-case FilePath="external-dataset"> diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3_one_partition.xml b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3_one_partition.xml index 92b5e32..d02647d 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3_one_partition.xml +++ b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3_one_partition.xml @@ -49,17 +49,17 @@ <output-dir compare="Text">common/csv-warnings</output-dir> <expected-warn>Parsing error at data_dir/no_h_missing_fields.csv line 2 field 3: some fields are missing</expected-warn> <expected-warn>Parsing error at data_dir/no_h_no_closing_q.csv line 2 field 0: malformed input record ended abruptly</expected-warn> - <expected-warn>Parsing error at line 2 field 0: malformed input record ended abruptly</expected-warn> + <expected-warn>Parsing error at line 2 field 0: malformed input record ended abruptly</expected-warn> - <expected-warn>Parsing error at line 5 field 3: invalid value</expected-warn> - <expected-warn>Parsing error at line 2 field 1: invalid value</expected-warn> - <expected-warn>Parsing error at line 11 field 1: invalid value</expected-warn> - <expected-warn>Parsing error at line 3 field 1: invalid value</expected-warn> - <expected-warn>Parsing error at line 4 field 1: invalid value</expected-warn> - <expected-warn>Parsing error at line 7 field 7: invalid value</expected-warn> - <expected-warn>Parsing error at line 13 field 7: invalid value</expected-warn> - <expected-warn>Parsing error at line 12 field 3: invalid value</expected-warn> - <expected-warn>Parsing error at line 9 field 6: a quote should be in the beginning</expected-warn> + <expected-warn>Parsing error at line 5 field 3: invalid value</expected-warn> + <expected-warn>Parsing error at line 2 field 1: invalid value</expected-warn> + <expected-warn>Parsing error at line 11 field 1: invalid value</expected-warn> + <expected-warn>Parsing error at line 3 field 1: invalid value</expected-warn> + <expected-warn>Parsing error at line 4 field 1: invalid value</expected-warn> + <expected-warn>Parsing error at line 7 field 7: invalid value</expected-warn> + <expected-warn>Parsing error at line 13 field 7: invalid value</expected-warn> + <expected-warn>Parsing error at line 12 field 3: invalid value</expected-warn> + <expected-warn>Parsing error at line 9 field 6: a quote should be in the beginning</expected-warn> <expected-warn>Parsing error at data_dir/h_invalid_values.csv line 5 field 3: invalid value</expected-warn> <expected-warn>Parsing error at data_dir/h_invalid_values.csv line 2 field 1: invalid value</expected-warn> diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/QuotedLineRecordReader.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/QuotedLineRecordReader.java index 3a502d0..4c253bc 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/QuotedLineRecordReader.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/QuotedLineRecordReader.java @@ -29,10 +29,10 @@ import java.util.Map; import org.apache.asterix.external.api.AsterixInputStream; import org.apache.asterix.external.util.ExternalDataConstants; import org.apache.asterix.external.util.ExternalDataUtils; -import org.apache.asterix.external.util.ParseUtil; import org.apache.hyracks.api.context.IHyracksTaskContext; import org.apache.hyracks.api.exceptions.HyracksDataException; import org.apache.hyracks.api.exceptions.IWarningCollector; +import org.apache.hyracks.util.ParseUtil; public class QuotedLineRecordReader extends LineRecordReader { diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/SemiStructuredRecordReader.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/SemiStructuredRecordReader.java index 2ff5cfa..0e23e46 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/SemiStructuredRecordReader.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/SemiStructuredRecordReader.java @@ -40,10 +40,10 @@ import org.apache.asterix.common.exceptions.RuntimeDataException; import org.apache.asterix.external.api.AsterixInputStream; import org.apache.asterix.external.util.ExternalDataConstants; import org.apache.asterix.external.util.ExternalDataUtils; -import org.apache.asterix.external.util.ParseUtil; import org.apache.hyracks.api.context.IHyracksTaskContext; import org.apache.hyracks.api.exceptions.HyracksDataException; import org.apache.hyracks.api.exceptions.IWarningCollector; +import org.apache.hyracks.util.ParseUtil; public class SemiStructuredRecordReader extends StreamRecordReader { diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/AbstractJsonDataParser.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/AbstractJsonDataParser.java index 2bf0df4..2d20cc2 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/AbstractJsonDataParser.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/AbstractJsonDataParser.java @@ -18,6 +18,8 @@ */ package org.apache.asterix.external.parser; +import static org.apache.hyracks.api.exceptions.ErrorCode.PARSING_ERROR; + import java.io.DataOutput; import java.io.IOException; import java.util.BitSet; @@ -46,6 +48,7 @@ import org.apache.asterix.runtime.exceptions.UnsupportedTypeException; import org.apache.hyracks.api.exceptions.HyracksDataException; import org.apache.hyracks.api.util.ExceptionUtils; import org.apache.hyracks.data.std.api.IMutableValueStorage; +import org.apache.hyracks.util.ParseUtil; import com.fasterxml.jackson.core.JsonFactory; import com.fasterxml.jackson.core.JsonParseException; @@ -433,18 +436,13 @@ public abstract class AbstractJsonDataParser extends AbstractNestedDataParser<AD } long lineNum = lineNumber.getAsLong() + jsonParser.getCurrentLocation().getLineNr() - 1; JsonStreamContext parsingContext = jsonParser.getParsingContext(); - String fieldName = "N/A"; - while (parsingContext != null) { - String currentFieldName = parsingContext.getCurrentName(); - if (currentFieldName != null) { - fieldName = currentFieldName; - break; - } + String fieldName = null; + while (parsingContext != null && fieldName == null) { + fieldName = parsingContext.getCurrentName(); parsingContext = parsingContext.getParent(); } - - return HyracksDataException.create(org.apache.hyracks.api.exceptions.ErrorCode.PARSING_ERROR, - dataSourceName.get(), lineNum, fieldName, msg); + final String locationDetails = ParseUtil.asLocationDetailString(dataSourceName.get(), lineNum, fieldName); + return HyracksDataException.create(PARSING_ERROR, locationDetails, msg); } return new RuntimeDataException(ErrorCode.RECORD_READER_MALFORMED_INPUT_STREAM, e); } diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/DelimitedDataParser.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/DelimitedDataParser.java index 60e6e77..590f51d 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/DelimitedDataParser.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/DelimitedDataParser.java @@ -38,7 +38,6 @@ import org.apache.asterix.external.api.IRawRecord; import org.apache.asterix.external.api.IRecordDataParser; import org.apache.asterix.external.api.IStreamDataParser; import org.apache.asterix.external.util.ExternalDataConstants; -import org.apache.asterix.external.util.ParseUtil; import org.apache.asterix.om.base.AMutableString; import org.apache.asterix.om.typecomputer.impl.TypeComputeUtils; import org.apache.asterix.om.types.ARecordType; @@ -52,6 +51,7 @@ import org.apache.hyracks.data.std.util.ArrayBackedValueStorage; import org.apache.hyracks.dataflow.common.data.parsers.IValueParser; import org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory; import org.apache.hyracks.dataflow.std.file.FieldCursorForDelimitedDataParser; +import org.apache.hyracks.util.ParseUtil; public class DelimitedDataParser extends AbstractDataParser implements IStreamDataParser, IRecordDataParser<char[]> { diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ParseUtil.java b/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/util/ParseUtil.java similarity index 56% rename from asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ParseUtil.java rename to hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/util/ParseUtil.java index 5a46af7..63fec09 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ParseUtil.java +++ b/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/util/ParseUtil.java @@ -16,7 +16,9 @@ * specific language governing permissions and limitations * under the License. */ -package org.apache.asterix.external.util; +package org.apache.hyracks.util; + +import java.util.StringJoiner; import org.apache.hyracks.api.exceptions.ErrorCode; import org.apache.hyracks.api.exceptions.IWarningCollector; @@ -29,7 +31,24 @@ public class ParseUtil { public static void warn(IWarningCollector warningCollector, String dataSourceName, long lineNum, int fieldNum, String warnMessage) { - warningCollector - .warn(Warning.of(null, ErrorCode.PARSING_ERROR, dataSourceName, lineNum, fieldNum, warnMessage)); + warningCollector.warn(Warning.of(null, ErrorCode.PARSING_ERROR, + asLocationDetailString(dataSourceName, lineNum, fieldNum), warnMessage)); + } + + public static String asLocationDetailString(String dataSource, long lineNum, Object fieldIdentifier) { + StringJoiner details = new StringJoiner(" "); + details.setEmptyValue("N/A"); + if (dataSource != null && !dataSource.isEmpty()) { + details.add(dataSource); + } + if (lineNum >= 0) { + details.add("line " + lineNum); + } + if (fieldIdentifier instanceof Number) { + details.add("field " + fieldIdentifier); + } else if (fieldIdentifier instanceof String && !((String) fieldIdentifier).isEmpty()) { + details.add("field '" + fieldIdentifier + "'"); + } + return "at " + details; } } diff --git a/hyracks-fullstack/hyracks/hyracks-api/src/main/resources/errormsg/en.properties b/hyracks-fullstack/hyracks/hyracks-api/src/main/resources/errormsg/en.properties index 9f04fb2..4d9c60b 100644 --- a/hyracks-fullstack/hyracks/hyracks-api/src/main/resources/errormsg/en.properties +++ b/hyracks-fullstack/hyracks/hyracks-api/src/main/resources/errormsg/en.properties @@ -141,7 +141,7 @@ 121 = A numeric type promotion error has occurred: %1$s 122 = Encountered an error while printing the plan: %1$s 123 = Insufficient memory is provided for the join operators, please increase the join memory budget. -124 = Parsing error at %1$s line %2$s field %3$s: %4$s +124 = Parsing error %s: %s 125 = Invalid inverted list type traits: %1$s 126 = Illegal state. %1$s diff --git a/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/file/FieldCursorForDelimitedDataParser.java b/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/file/FieldCursorForDelimitedDataParser.java index 936d63e..ffc87cd 100644 --- a/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/file/FieldCursorForDelimitedDataParser.java +++ b/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/file/FieldCursorForDelimitedDataParser.java @@ -23,9 +23,8 @@ import java.io.Reader; import java.util.Arrays; import java.util.function.Supplier; -import org.apache.hyracks.api.exceptions.ErrorCode; import org.apache.hyracks.api.exceptions.IWarningCollector; -import org.apache.hyracks.api.exceptions.Warning; +import org.apache.hyracks.util.ParseUtil; public class FieldCursorForDelimitedDataParser { @@ -448,6 +447,6 @@ public class FieldCursorForDelimitedDataParser { } private void warn(String message) { - warnings.warn(Warning.of(null, ErrorCode.PARSING_ERROR, dataSourceName.get(), lineCount, fieldCount, message)); + ParseUtil.warn(warnings, dataSourceName.get(), lineCount, fieldCount, message); } }
