This is an automated email from the ASF dual-hosted git repository. mblow pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/asterixdb.git
commit 302ad471b7dca6a038c6e683c8ed7bea5063f169 Author: Ali Alsuliman <[email protected]> AuthorDate: Thu May 21 00:59:50 2020 -0700 [ASTERIXDB-2726][EXT] Report line number in errors of JSON parser - user model changes: no - storage format changes: no - interface changes: no Details: Report line number in errors of JSON parser. Change-Id: I9481cfd4a9244591d22963781497390216df67bb Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/6423 Integration-Tests: Jenkins <[email protected]> Tested-by: Jenkins <[email protected]> Reviewed-by: Ali Alsuliman <[email protected]> Reviewed-by: Hussain Towaileb <[email protected]> --- .../asterix-app/data/json/malformed-json-2.json | 7 +++ .../asterix-app/data/jsonl/malformed-jsonl-1.json | 4 ++ .../asterix-app/data/jsonl/malformed-jsonl-2.json | 14 ++++++ .../malformed-json.000.s3bucket.sqlpp | 20 ++++++++ .../s3/malformed-json/malformed-json.001.ddl.sqlpp | 36 ++++++++++++++ .../malformed-json/malformed-json.002.query.sqlpp | 21 +++++++++ .../malformed-json.003.s3bucket.sqlpp | 20 ++++++++ .../malformed-json/malformed-json.004.query.sqlpp | 21 +++++++++ .../malformed-json.005.s3bucket.sqlpp | 20 ++++++++ .../malformed-json/malformed-json.006.query.sqlpp | 21 +++++++++ .../malformed-json.007.s3bucket.sqlpp | 20 ++++++++ .../malformed-json/malformed-json.008.query.sqlpp | 21 +++++++++ .../malformed-json.009.s3bucket.sqlpp | 20 ++++++++ .../malformed-json/malformed-json.010.query.sqlpp | 21 +++++++++ .../s3/malformed-json/malformed-json.099.ddl.sqlpp | 20 ++++++++ .../runtimets/testsuite_external_dataset.xml | 10 ++++ .../reader/stream/SemiStructuredRecordReader.java | 1 + .../external/input/stream/LocalFSInputStream.java | 55 ++++++++++++++-------- .../asterix/external/parser/JSONDataParser.java | 47 +++++++++++++++++- 19 files changed, 379 insertions(+), 20 deletions(-) diff --git a/asterixdb/asterix-app/data/json/malformed-json-2.json b/asterixdb/asterix-app/data/json/malformed-json-2.json new file mode 100644 index 0000000..88ef9f2 --- /dev/null +++ b/asterixdb/asterix-app/data/json/malformed-json-2.json @@ -0,0 +1,7 @@ +{"field": 1, + "field2": { + "nested1": 9, + "array_f": [1, 2,], + "nested2": 10 + } +} \ No newline at end of file diff --git a/asterixdb/asterix-app/data/jsonl/malformed-jsonl-1.json b/asterixdb/asterix-app/data/jsonl/malformed-jsonl-1.json new file mode 100644 index 0000000..7d15a9c --- /dev/null +++ b/asterixdb/asterix-app/data/jsonl/malformed-jsonl-1.json @@ -0,0 +1,4 @@ +{"field": 1, "field2": true} +{"field": 2, "field2": false} +{"field": 3, "field2": truee} +{"field": 4, "field2": true} \ No newline at end of file diff --git a/asterixdb/asterix-app/data/jsonl/malformed-jsonl-2.json b/asterixdb/asterix-app/data/jsonl/malformed-jsonl-2.json new file mode 100644 index 0000000..ebd3538 --- /dev/null +++ b/asterixdb/asterix-app/data/jsonl/malformed-jsonl-2.json @@ -0,0 +1,14 @@ +{"field": 1, + "field2": { + "nested1": 8, + "array_f": [1, 2], + "nested2": 9 + } +} +{"field": 2, + "field2": { + "nested1": 88, + "array_f": [11, 22, ], + "nested2": 99 + } +} \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/malformed-json/malformed-json.000.s3bucket.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/malformed-json/malformed-json.000.s3bucket.sqlpp new file mode 100644 index 0000000..edae2a6 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/malformed-json/malformed-json.000.s3bucket.sqlpp @@ -0,0 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +// create S3 bucket with data +playground malformed-data data/json/duplicate-fields.json \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/malformed-json/malformed-json.001.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/malformed-json/malformed-json.001.ddl.sqlpp new file mode 100644 index 0000000..6ee0509 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/malformed-json/malformed-json.001.ddl.sqlpp @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +DROP DATAVERSE test IF EXISTS; +CREATE DATAVERSE test; +USE test; + +DROP TYPE t1 IF EXISTS; +CREATE TYPE t1 AS {}; + +DROP DATASET ds1 IF EXISTS; +CREATE EXTERNAL DATASET ds1(t1) USING S3 ( +("accessKeyId"="dummyAccessKey"), +("secretAccessKey"="dummySecretKey"), +("region"="us-west-2"), +("serviceEndpoint"="http://localhost:8001"), +("container"="playground"), +("definition"="malformed-data"), +("format"="json") +); \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/malformed-json/malformed-json.002.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/malformed-json/malformed-json.002.query.sqlpp new file mode 100644 index 0000000..3dedd2f --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/malformed-json/malformed-json.002.query.sqlpp @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +USE test; + +FROM ds1 v SELECT VALUE v; \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/malformed-json/malformed-json.003.s3bucket.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/malformed-json/malformed-json.003.s3bucket.sqlpp new file mode 100644 index 0000000..ea73e7e --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/malformed-json/malformed-json.003.s3bucket.sqlpp @@ -0,0 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +// create S3 bucket with data +playground malformed-data data/json/malformed-json.json \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/malformed-json/malformed-json.004.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/malformed-json/malformed-json.004.query.sqlpp new file mode 100644 index 0000000..3dedd2f --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/malformed-json/malformed-json.004.query.sqlpp @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +USE test; + +FROM ds1 v SELECT VALUE v; \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/malformed-json/malformed-json.005.s3bucket.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/malformed-json/malformed-json.005.s3bucket.sqlpp new file mode 100644 index 0000000..ef6d8df --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/malformed-json/malformed-json.005.s3bucket.sqlpp @@ -0,0 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +// create S3 bucket with data +playground malformed-data data/json/malformed-json-2.json \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/malformed-json/malformed-json.006.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/malformed-json/malformed-json.006.query.sqlpp new file mode 100644 index 0000000..3dedd2f --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/malformed-json/malformed-json.006.query.sqlpp @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +USE test; + +FROM ds1 v SELECT VALUE v; \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/malformed-json/malformed-json.007.s3bucket.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/malformed-json/malformed-json.007.s3bucket.sqlpp new file mode 100644 index 0000000..3c84eda --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/malformed-json/malformed-json.007.s3bucket.sqlpp @@ -0,0 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +// create S3 bucket with data +playground malformed-data data/jsonl/malformed-jsonl-1.json \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/malformed-json/malformed-json.008.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/malformed-json/malformed-json.008.query.sqlpp new file mode 100644 index 0000000..3dedd2f --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/malformed-json/malformed-json.008.query.sqlpp @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +USE test; + +FROM ds1 v SELECT VALUE v; \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/malformed-json/malformed-json.009.s3bucket.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/malformed-json/malformed-json.009.s3bucket.sqlpp new file mode 100644 index 0000000..25f0c8c --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/malformed-json/malformed-json.009.s3bucket.sqlpp @@ -0,0 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +// create S3 bucket with data +playground malformed-data data/jsonl/malformed-jsonl-2.json \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/malformed-json/malformed-json.010.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/malformed-json/malformed-json.010.query.sqlpp new file mode 100644 index 0000000..3dedd2f --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/malformed-json/malformed-json.010.query.sqlpp @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +USE test; + +FROM ds1 v SELECT VALUE v; \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/malformed-json/malformed-json.099.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/malformed-json/malformed-json.099.ddl.sqlpp new file mode 100644 index 0000000..36b2bab --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/malformed-json/malformed-json.099.ddl.sqlpp @@ -0,0 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +DROP DATAVERSE test IF EXISTS; \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset.xml b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset.xml index 5aa1326..b0346f8 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset.xml +++ b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset.xml @@ -72,5 +72,15 @@ <output-dir compare="Text">aws/s3/over-1000-objects</output-dir> </compilation-unit> </test-case> + <test-case FilePath="external-dataset"> + <compilation-unit name="aws/s3/malformed-json"> + <output-dir compare="Text">aws/s3/malformed-json</output-dir> + <expected-error>Parsing error at malformed-data/duplicate-fields.json line 1 field field: Duplicate field 'field'</expected-error> + <expected-error>Parsing error at malformed-data/malformed-json.json line 1 field field: Unexpected character ('}' (code 125)): was expecting double-quote to start field name</expected-error> + <expected-error>Parsing error at malformed-data/malformed-json-2.json line 4 field array_f: Unexpected character (']' (code 93)): expected a valid value (JSON String, Number, Array, Object or token 'null', 'true' or 'false')</expected-error> + <expected-error>Parsing error at malformed-data/malformed-jsonl-1.json line 3 field field2: Unrecognized token 'truee': was expecting (JSON String, Number, Array, Object or token 'null', 'true' or 'false')</expected-error> + <expected-error>Parsing error at malformed-data/malformed-jsonl-2.json line 11 field array_f: Unexpected character (']' (code 93)): expected a valid value (JSON String, Number, Array, Object or token 'null', 'true' or 'false')</expected-error> + </compilation-unit> + </test-case> </test-group> </test-suite> diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/SemiStructuredRecordReader.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/SemiStructuredRecordReader.java index dfc60bc..5f8d923 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/SemiStructuredRecordReader.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/SemiStructuredRecordReader.java @@ -142,6 +142,7 @@ public class SemiStructuredRecordReader extends StreamRecordReader { if (state == State.ARRAY || state == State.AFTER_COMMA) { state = State.NESTED_OBJECT; } + beginLineNumber = lineNumber; startPosn = bufferPosn; hasStarted = true; depth = 1; diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/stream/LocalFSInputStream.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/stream/LocalFSInputStream.java index 9e1b052..be6a331 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/stream/LocalFSInputStream.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/stream/LocalFSInputStream.java @@ -18,11 +18,16 @@ */ package org.apache.asterix.external.input.stream; +import static org.apache.asterix.common.exceptions.ErrorCode.ASTERIX; +import static org.apache.asterix.common.exceptions.ErrorCode.INPUT_RECORD_READER_CHAR_ARRAY_RECORD_TOO_LARGE; +import static org.apache.asterix.common.exceptions.ErrorCode.RECORD_READER_MALFORMED_INPUT_STREAM; +import static org.apache.hyracks.api.exceptions.ErrorCode.HYRACKS; +import static org.apache.hyracks.api.exceptions.ErrorCode.PARSING_ERROR; + import java.io.File; import java.io.FileInputStream; import java.io.IOException; -import org.apache.asterix.common.exceptions.ErrorCode; import org.apache.asterix.common.exceptions.ExceptionUtils; import org.apache.asterix.external.dataflow.AbstractFeedDataFlowController; import org.apache.asterix.external.util.FeedLogManager; @@ -130,28 +135,29 @@ public class LocalFSInputStream extends AbstractMultipleInputStream { if (root instanceof HyracksDataException) { HyracksDataException r = (HyracksDataException) root; String component = r.getComponent(); - if (ErrorCode.ASTERIX.equals(component)) { - int errorCode = r.getErrorCode(); + boolean advance = false; + int errorCode = r.getErrorCode(); + if (ASTERIX.equals(component)) { switch (errorCode) { - case ErrorCode.RECORD_READER_MALFORMED_INPUT_STREAM: - if (currentFile != null) { - try { - logManager.logRecord(currentFile.getAbsolutePath(), "Corrupted input file"); - } catch (IOException e) { - LOGGER.log(Level.WARN, "Filed to write to feed log file", e); - } - LOGGER.log(Level.WARN, "Corrupted input file: " + currentFile.getAbsolutePath()); - } - case ErrorCode.INPUT_RECORD_READER_CHAR_ARRAY_RECORD_TOO_LARGE: - try { - advance(); - return true; - } catch (Exception e) { - LOGGER.log(Level.WARN, "An exception was thrown while trying to skip a file", e); - } + case RECORD_READER_MALFORMED_INPUT_STREAM: + logCorruptedInput(); + case INPUT_RECORD_READER_CHAR_ARRAY_RECORD_TOO_LARGE: + advance = true; + break; default: break; } + } else if (HYRACKS.equals(component) && errorCode == PARSING_ERROR) { + logCorruptedInput(); + advance = true; + } + if (advance) { + try { + advance(); + return true; + } catch (Exception e) { + LOGGER.log(Level.WARN, "An exception was thrown while trying to skip a file", e); + } } } LOGGER.log(Level.WARN, "Failed to recover from failure", th); @@ -167,4 +173,15 @@ public class LocalFSInputStream extends AbstractMultipleInputStream { public String getPreviousStreamName() { return lastFileName; } + + private void logCorruptedInput() { + if (currentFile != null) { + try { + logManager.logRecord(currentFile.getAbsolutePath(), "Corrupted input file"); + } catch (IOException e) { + LOGGER.log(Level.WARN, "Filed to write to feed log file", e); + } + LOGGER.log(Level.WARN, "Corrupted input file: " + currentFile.getAbsolutePath()); + } + } } diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/JSONDataParser.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/JSONDataParser.java index 3216aef..8c518c4 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/JSONDataParser.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/JSONDataParser.java @@ -22,6 +22,8 @@ import java.io.DataOutput; import java.io.IOException; import java.io.InputStream; import java.util.BitSet; +import java.util.function.LongSupplier; +import java.util.function.Supplier; import org.apache.asterix.builders.IARecordBuilder; import org.apache.asterix.builders.IAsterixListBuilder; @@ -33,6 +35,7 @@ import org.apache.asterix.external.api.IStreamDataParser; import org.apache.asterix.external.parser.jackson.ADMToken; import org.apache.asterix.external.parser.jackson.GeometryCoParser; import org.apache.asterix.external.parser.jackson.ParserContext; +import org.apache.asterix.external.util.ExternalDataConstants; import org.apache.asterix.om.base.ABoolean; import org.apache.asterix.om.base.ANull; import org.apache.asterix.om.base.AUnorderedList; @@ -45,10 +48,13 @@ import org.apache.asterix.om.types.IAType; import org.apache.asterix.om.utils.RecordUtil; import org.apache.asterix.runtime.exceptions.UnsupportedTypeException; import org.apache.hyracks.api.exceptions.HyracksDataException; +import org.apache.hyracks.api.util.ExceptionUtils; import org.apache.hyracks.data.std.api.IMutableValueStorage; import com.fasterxml.jackson.core.JsonFactory; +import com.fasterxml.jackson.core.JsonParseException; import com.fasterxml.jackson.core.JsonParser; +import com.fasterxml.jackson.core.JsonStreamContext; import com.fasterxml.jackson.core.JsonToken; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.node.TreeTraversingParser; @@ -63,6 +69,8 @@ public class JSONDataParser extends AbstractNestedDataParser<ADMToken> protected final JsonFactory jsonFactory; protected final ARecordType rootType; protected final GeometryCoParser geometryCoParser; + private Supplier<String> dataSourceName; + private LongSupplier lineNumber; protected JsonParser jsonParser; @@ -81,6 +89,8 @@ public class JSONDataParser extends AbstractNestedDataParser<ADMToken> //GeometyCoParser to parse GeoJSON objects to AsterixDB internal spatial types. geometryCoParser = new GeometryCoParser(jsonParser); parserContext = new ParserContext(); + this.dataSourceName = ExternalDataConstants.EMPTY_STRING; + this.lineNumber = ExternalDataConstants.NO_LINES; } /* @@ -90,6 +100,12 @@ public class JSONDataParser extends AbstractNestedDataParser<ADMToken> */ @Override + public void configure(Supplier<String> dataSourceName, LongSupplier lineNumber) { + this.dataSourceName = dataSourceName == null ? ExternalDataConstants.EMPTY_STRING : dataSourceName; + this.lineNumber = lineNumber == null ? ExternalDataConstants.NO_LINES : lineNumber; + } + + @Override public final boolean parse(IRawRecord<? extends char[]> record, DataOutput out) throws HyracksDataException { try { //TODO(wyk): find a way to reset byte[] instead of creating a new parser for each record. @@ -99,7 +115,7 @@ public class JSONDataParser extends AbstractNestedDataParser<ADMToken> parseObject(rootType, out); return true; } catch (IOException e) { - throw new RuntimeDataException(ErrorCode.RECORD_READER_MALFORMED_INPUT_STREAM, e); + throw createException(e); } } @@ -460,4 +476,33 @@ public class JSONDataParser extends AbstractNestedDataParser<ADMToken> } } + + private HyracksDataException createException(IOException e) { + if (jsonParser != null) { + String msg; + if (e instanceof JsonParseException) { + msg = ((JsonParseException) e).getOriginalMessage(); + } else { + msg = ExceptionUtils.getRootCause(e).getMessage(); + } + if (msg == null) { + msg = ErrorCode.getErrorMessage(ErrorCode.RECORD_READER_MALFORMED_INPUT_STREAM); + } + long lineNum = lineNumber.getAsLong() + jsonParser.getCurrentLocation().getLineNr() - 1; + JsonStreamContext parsingContext = jsonParser.getParsingContext(); + String fieldName = "N/A"; + while (parsingContext != null) { + String currentFieldName = parsingContext.getCurrentName(); + if (currentFieldName != null) { + fieldName = currentFieldName; + break; + } + parsingContext = parsingContext.getParent(); + } + + return HyracksDataException.create(org.apache.hyracks.api.exceptions.ErrorCode.PARSING_ERROR, + dataSourceName.get(), lineNum, fieldName, msg); + } + return new RuntimeDataException(ErrorCode.RECORD_READER_MALFORMED_INPUT_STREAM, e); + } }
