This is an automated email from the ASF dual-hosted git repository.

mblow pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/asterixdb.git

commit 302ad471b7dca6a038c6e683c8ed7bea5063f169
Author: Ali Alsuliman <[email protected]>
AuthorDate: Thu May 21 00:59:50 2020 -0700

    [ASTERIXDB-2726][EXT] Report line number in errors of JSON parser
    
    - user model changes: no
    - storage format changes: no
    - interface changes: no
    
    Details:
    Report line number in errors of JSON parser.
    
    Change-Id: I9481cfd4a9244591d22963781497390216df67bb
    Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/6423
    Integration-Tests: Jenkins <[email protected]>
    Tested-by: Jenkins <[email protected]>
    Reviewed-by: Ali Alsuliman <[email protected]>
    Reviewed-by: Hussain Towaileb <[email protected]>
---
 .../asterix-app/data/json/malformed-json-2.json    |  7 +++
 .../asterix-app/data/jsonl/malformed-jsonl-1.json  |  4 ++
 .../asterix-app/data/jsonl/malformed-jsonl-2.json  | 14 ++++++
 .../malformed-json.000.s3bucket.sqlpp              | 20 ++++++++
 .../s3/malformed-json/malformed-json.001.ddl.sqlpp | 36 ++++++++++++++
 .../malformed-json/malformed-json.002.query.sqlpp  | 21 +++++++++
 .../malformed-json.003.s3bucket.sqlpp              | 20 ++++++++
 .../malformed-json/malformed-json.004.query.sqlpp  | 21 +++++++++
 .../malformed-json.005.s3bucket.sqlpp              | 20 ++++++++
 .../malformed-json/malformed-json.006.query.sqlpp  | 21 +++++++++
 .../malformed-json.007.s3bucket.sqlpp              | 20 ++++++++
 .../malformed-json/malformed-json.008.query.sqlpp  | 21 +++++++++
 .../malformed-json.009.s3bucket.sqlpp              | 20 ++++++++
 .../malformed-json/malformed-json.010.query.sqlpp  | 21 +++++++++
 .../s3/malformed-json/malformed-json.099.ddl.sqlpp | 20 ++++++++
 .../runtimets/testsuite_external_dataset.xml       | 10 ++++
 .../reader/stream/SemiStructuredRecordReader.java  |  1 +
 .../external/input/stream/LocalFSInputStream.java  | 55 ++++++++++++++--------
 .../asterix/external/parser/JSONDataParser.java    | 47 +++++++++++++++++-
 19 files changed, 379 insertions(+), 20 deletions(-)

diff --git a/asterixdb/asterix-app/data/json/malformed-json-2.json 
b/asterixdb/asterix-app/data/json/malformed-json-2.json
new file mode 100644
index 0000000..88ef9f2
--- /dev/null
+++ b/asterixdb/asterix-app/data/json/malformed-json-2.json
@@ -0,0 +1,7 @@
+{"field": 1,
+  "field2": {
+    "nested1": 9,
+    "array_f": [1, 2,],
+    "nested2": 10
+  }
+}
\ No newline at end of file
diff --git a/asterixdb/asterix-app/data/jsonl/malformed-jsonl-1.json 
b/asterixdb/asterix-app/data/jsonl/malformed-jsonl-1.json
new file mode 100644
index 0000000..7d15a9c
--- /dev/null
+++ b/asterixdb/asterix-app/data/jsonl/malformed-jsonl-1.json
@@ -0,0 +1,4 @@
+{"field": 1, "field2": true}
+{"field": 2, "field2": false}
+{"field": 3, "field2": truee}
+{"field": 4, "field2": true}
\ No newline at end of file
diff --git a/asterixdb/asterix-app/data/jsonl/malformed-jsonl-2.json 
b/asterixdb/asterix-app/data/jsonl/malformed-jsonl-2.json
new file mode 100644
index 0000000..ebd3538
--- /dev/null
+++ b/asterixdb/asterix-app/data/jsonl/malformed-jsonl-2.json
@@ -0,0 +1,14 @@
+{"field": 1,
+  "field2": {
+    "nested1": 8,
+    "array_f": [1, 2],
+    "nested2": 9
+  }
+}
+{"field": 2,
+  "field2": {
+    "nested1": 88,
+    "array_f": [11, 22, ],
+    "nested2": 99
+  }
+}
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/malformed-json/malformed-json.000.s3bucket.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/malformed-json/malformed-json.000.s3bucket.sqlpp
new file mode 100644
index 0000000..edae2a6
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/malformed-json/malformed-json.000.s3bucket.sqlpp
@@ -0,0 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+// create S3 bucket with data
+playground malformed-data data/json/duplicate-fields.json
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/malformed-json/malformed-json.001.ddl.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/malformed-json/malformed-json.001.ddl.sqlpp
new file mode 100644
index 0000000..6ee0509
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/malformed-json/malformed-json.001.ddl.sqlpp
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+DROP DATAVERSE test IF EXISTS;
+CREATE DATAVERSE test;
+USE test;
+
+DROP TYPE t1 IF EXISTS;
+CREATE TYPE t1 AS {};
+
+DROP DATASET ds1 IF EXISTS;
+CREATE EXTERNAL DATASET ds1(t1) USING S3 (
+("accessKeyId"="dummyAccessKey"),
+("secretAccessKey"="dummySecretKey"),
+("region"="us-west-2"),
+("serviceEndpoint"="http://localhost:8001";),
+("container"="playground"),
+("definition"="malformed-data"),
+("format"="json")
+);
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/malformed-json/malformed-json.002.query.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/malformed-json/malformed-json.002.query.sqlpp
new file mode 100644
index 0000000..3dedd2f
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/malformed-json/malformed-json.002.query.sqlpp
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+USE test;
+
+FROM ds1 v SELECT VALUE v;
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/malformed-json/malformed-json.003.s3bucket.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/malformed-json/malformed-json.003.s3bucket.sqlpp
new file mode 100644
index 0000000..ea73e7e
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/malformed-json/malformed-json.003.s3bucket.sqlpp
@@ -0,0 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+// create S3 bucket with data
+playground malformed-data data/json/malformed-json.json
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/malformed-json/malformed-json.004.query.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/malformed-json/malformed-json.004.query.sqlpp
new file mode 100644
index 0000000..3dedd2f
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/malformed-json/malformed-json.004.query.sqlpp
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+USE test;
+
+FROM ds1 v SELECT VALUE v;
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/malformed-json/malformed-json.005.s3bucket.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/malformed-json/malformed-json.005.s3bucket.sqlpp
new file mode 100644
index 0000000..ef6d8df
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/malformed-json/malformed-json.005.s3bucket.sqlpp
@@ -0,0 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+// create S3 bucket with data
+playground malformed-data data/json/malformed-json-2.json
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/malformed-json/malformed-json.006.query.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/malformed-json/malformed-json.006.query.sqlpp
new file mode 100644
index 0000000..3dedd2f
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/malformed-json/malformed-json.006.query.sqlpp
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+USE test;
+
+FROM ds1 v SELECT VALUE v;
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/malformed-json/malformed-json.007.s3bucket.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/malformed-json/malformed-json.007.s3bucket.sqlpp
new file mode 100644
index 0000000..3c84eda
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/malformed-json/malformed-json.007.s3bucket.sqlpp
@@ -0,0 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+// create S3 bucket with data
+playground malformed-data data/jsonl/malformed-jsonl-1.json
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/malformed-json/malformed-json.008.query.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/malformed-json/malformed-json.008.query.sqlpp
new file mode 100644
index 0000000..3dedd2f
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/malformed-json/malformed-json.008.query.sqlpp
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+USE test;
+
+FROM ds1 v SELECT VALUE v;
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/malformed-json/malformed-json.009.s3bucket.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/malformed-json/malformed-json.009.s3bucket.sqlpp
new file mode 100644
index 0000000..25f0c8c
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/malformed-json/malformed-json.009.s3bucket.sqlpp
@@ -0,0 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+// create S3 bucket with data
+playground malformed-data data/jsonl/malformed-jsonl-2.json
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/malformed-json/malformed-json.010.query.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/malformed-json/malformed-json.010.query.sqlpp
new file mode 100644
index 0000000..3dedd2f
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/malformed-json/malformed-json.010.query.sqlpp
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+USE test;
+
+FROM ds1 v SELECT VALUE v;
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/malformed-json/malformed-json.099.ddl.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/malformed-json/malformed-json.099.ddl.sqlpp
new file mode 100644
index 0000000..36b2bab
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/malformed-json/malformed-json.099.ddl.sqlpp
@@ -0,0 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+DROP DATAVERSE test IF EXISTS;
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset.xml
 
b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset.xml
index 5aa1326..b0346f8 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset.xml
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset.xml
@@ -72,5 +72,15 @@
         <output-dir compare="Text">aws/s3/over-1000-objects</output-dir>
       </compilation-unit>
     </test-case>
+    <test-case FilePath="external-dataset">
+      <compilation-unit name="aws/s3/malformed-json">
+        <output-dir compare="Text">aws/s3/malformed-json</output-dir>
+        <expected-error>Parsing error at malformed-data/duplicate-fields.json 
line 1 field field: Duplicate field 'field'</expected-error>
+        <expected-error>Parsing error at malformed-data/malformed-json.json 
line 1 field field: Unexpected character ('}' (code 125)): was expecting 
double-quote to start field name</expected-error>
+        <expected-error>Parsing error at malformed-data/malformed-json-2.json 
line 4 field array_f: Unexpected character (']' (code 93)): expected a valid 
value (JSON String, Number, Array, Object or token 'null', 'true' or 
'false')</expected-error>
+        <expected-error>Parsing error at malformed-data/malformed-jsonl-1.json 
line 3 field field2: Unrecognized token 'truee': was expecting (JSON String, 
Number, Array, Object or token 'null', 'true' or 'false')</expected-error>
+        <expected-error>Parsing error at malformed-data/malformed-jsonl-2.json 
line 11 field array_f: Unexpected character (']' (code 93)): expected a valid 
value (JSON String, Number, Array, Object or token 'null', 'true' or 
'false')</expected-error>
+      </compilation-unit>
+    </test-case>
   </test-group>
 </test-suite>
diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/SemiStructuredRecordReader.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/SemiStructuredRecordReader.java
index dfc60bc..5f8d923 100644
--- 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/SemiStructuredRecordReader.java
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/SemiStructuredRecordReader.java
@@ -142,6 +142,7 @@ public class SemiStructuredRecordReader extends 
StreamRecordReader {
                         if (state == State.ARRAY || state == 
State.AFTER_COMMA) {
                             state = State.NESTED_OBJECT;
                         }
+                        beginLineNumber = lineNumber;
                         startPosn = bufferPosn;
                         hasStarted = true;
                         depth = 1;
diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/stream/LocalFSInputStream.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/stream/LocalFSInputStream.java
index 9e1b052..be6a331 100644
--- 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/stream/LocalFSInputStream.java
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/stream/LocalFSInputStream.java
@@ -18,11 +18,16 @@
  */
 package org.apache.asterix.external.input.stream;
 
+import static org.apache.asterix.common.exceptions.ErrorCode.ASTERIX;
+import static 
org.apache.asterix.common.exceptions.ErrorCode.INPUT_RECORD_READER_CHAR_ARRAY_RECORD_TOO_LARGE;
+import static 
org.apache.asterix.common.exceptions.ErrorCode.RECORD_READER_MALFORMED_INPUT_STREAM;
+import static org.apache.hyracks.api.exceptions.ErrorCode.HYRACKS;
+import static org.apache.hyracks.api.exceptions.ErrorCode.PARSING_ERROR;
+
 import java.io.File;
 import java.io.FileInputStream;
 import java.io.IOException;
 
-import org.apache.asterix.common.exceptions.ErrorCode;
 import org.apache.asterix.common.exceptions.ExceptionUtils;
 import org.apache.asterix.external.dataflow.AbstractFeedDataFlowController;
 import org.apache.asterix.external.util.FeedLogManager;
@@ -130,28 +135,29 @@ public class LocalFSInputStream extends 
AbstractMultipleInputStream {
         if (root instanceof HyracksDataException) {
             HyracksDataException r = (HyracksDataException) root;
             String component = r.getComponent();
-            if (ErrorCode.ASTERIX.equals(component)) {
-                int errorCode = r.getErrorCode();
+            boolean advance = false;
+            int errorCode = r.getErrorCode();
+            if (ASTERIX.equals(component)) {
                 switch (errorCode) {
-                    case ErrorCode.RECORD_READER_MALFORMED_INPUT_STREAM:
-                        if (currentFile != null) {
-                            try {
-                                
logManager.logRecord(currentFile.getAbsolutePath(), "Corrupted input file");
-                            } catch (IOException e) {
-                                LOGGER.log(Level.WARN, "Filed to write to feed 
log file", e);
-                            }
-                            LOGGER.log(Level.WARN, "Corrupted input file: " + 
currentFile.getAbsolutePath());
-                        }
-                    case 
ErrorCode.INPUT_RECORD_READER_CHAR_ARRAY_RECORD_TOO_LARGE:
-                        try {
-                            advance();
-                            return true;
-                        } catch (Exception e) {
-                            LOGGER.log(Level.WARN, "An exception was thrown 
while trying to skip a file", e);
-                        }
+                    case RECORD_READER_MALFORMED_INPUT_STREAM:
+                        logCorruptedInput();
+                    case INPUT_RECORD_READER_CHAR_ARRAY_RECORD_TOO_LARGE:
+                        advance = true;
+                        break;
                     default:
                         break;
                 }
+            } else if (HYRACKS.equals(component) && errorCode == 
PARSING_ERROR) {
+                logCorruptedInput();
+                advance = true;
+            }
+            if (advance) {
+                try {
+                    advance();
+                    return true;
+                } catch (Exception e) {
+                    LOGGER.log(Level.WARN, "An exception was thrown while 
trying to skip a file", e);
+                }
             }
         }
         LOGGER.log(Level.WARN, "Failed to recover from failure", th);
@@ -167,4 +173,15 @@ public class LocalFSInputStream extends 
AbstractMultipleInputStream {
     public String getPreviousStreamName() {
         return lastFileName;
     }
+
+    private void logCorruptedInput() {
+        if (currentFile != null) {
+            try {
+                logManager.logRecord(currentFile.getAbsolutePath(), "Corrupted 
input file");
+            } catch (IOException e) {
+                LOGGER.log(Level.WARN, "Filed to write to feed log file", e);
+            }
+            LOGGER.log(Level.WARN, "Corrupted input file: " + 
currentFile.getAbsolutePath());
+        }
+    }
 }
diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/JSONDataParser.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/JSONDataParser.java
index 3216aef..8c518c4 100644
--- 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/JSONDataParser.java
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/JSONDataParser.java
@@ -22,6 +22,8 @@ import java.io.DataOutput;
 import java.io.IOException;
 import java.io.InputStream;
 import java.util.BitSet;
+import java.util.function.LongSupplier;
+import java.util.function.Supplier;
 
 import org.apache.asterix.builders.IARecordBuilder;
 import org.apache.asterix.builders.IAsterixListBuilder;
@@ -33,6 +35,7 @@ import org.apache.asterix.external.api.IStreamDataParser;
 import org.apache.asterix.external.parser.jackson.ADMToken;
 import org.apache.asterix.external.parser.jackson.GeometryCoParser;
 import org.apache.asterix.external.parser.jackson.ParserContext;
+import org.apache.asterix.external.util.ExternalDataConstants;
 import org.apache.asterix.om.base.ABoolean;
 import org.apache.asterix.om.base.ANull;
 import org.apache.asterix.om.base.AUnorderedList;
@@ -45,10 +48,13 @@ import org.apache.asterix.om.types.IAType;
 import org.apache.asterix.om.utils.RecordUtil;
 import org.apache.asterix.runtime.exceptions.UnsupportedTypeException;
 import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.api.util.ExceptionUtils;
 import org.apache.hyracks.data.std.api.IMutableValueStorage;
 
 import com.fasterxml.jackson.core.JsonFactory;
+import com.fasterxml.jackson.core.JsonParseException;
 import com.fasterxml.jackson.core.JsonParser;
+import com.fasterxml.jackson.core.JsonStreamContext;
 import com.fasterxml.jackson.core.JsonToken;
 import com.fasterxml.jackson.databind.JsonNode;
 import com.fasterxml.jackson.databind.node.TreeTraversingParser;
@@ -63,6 +69,8 @@ public class JSONDataParser extends 
AbstractNestedDataParser<ADMToken>
     protected final JsonFactory jsonFactory;
     protected final ARecordType rootType;
     protected final GeometryCoParser geometryCoParser;
+    private Supplier<String> dataSourceName;
+    private LongSupplier lineNumber;
 
     protected JsonParser jsonParser;
 
@@ -81,6 +89,8 @@ public class JSONDataParser extends 
AbstractNestedDataParser<ADMToken>
         //GeometyCoParser to parse GeoJSON objects to AsterixDB internal 
spatial types.
         geometryCoParser = new GeometryCoParser(jsonParser);
         parserContext = new ParserContext();
+        this.dataSourceName = ExternalDataConstants.EMPTY_STRING;
+        this.lineNumber = ExternalDataConstants.NO_LINES;
     }
 
     /*
@@ -90,6 +100,12 @@ public class JSONDataParser extends 
AbstractNestedDataParser<ADMToken>
      */
 
     @Override
+    public void configure(Supplier<String> dataSourceName, LongSupplier 
lineNumber) {
+        this.dataSourceName = dataSourceName == null ? 
ExternalDataConstants.EMPTY_STRING : dataSourceName;
+        this.lineNumber = lineNumber == null ? ExternalDataConstants.NO_LINES 
: lineNumber;
+    }
+
+    @Override
     public final boolean parse(IRawRecord<? extends char[]> record, DataOutput 
out) throws HyracksDataException {
         try {
             //TODO(wyk): find a way to reset byte[] instead of creating a new 
parser for each record.
@@ -99,7 +115,7 @@ public class JSONDataParser extends 
AbstractNestedDataParser<ADMToken>
             parseObject(rootType, out);
             return true;
         } catch (IOException e) {
-            throw new 
RuntimeDataException(ErrorCode.RECORD_READER_MALFORMED_INPUT_STREAM, e);
+            throw createException(e);
         }
     }
 
@@ -460,4 +476,33 @@ public class JSONDataParser extends 
AbstractNestedDataParser<ADMToken>
 
         }
     }
+
+    private HyracksDataException createException(IOException e) {
+        if (jsonParser != null) {
+            String msg;
+            if (e instanceof JsonParseException) {
+                msg = ((JsonParseException) e).getOriginalMessage();
+            } else {
+                msg = ExceptionUtils.getRootCause(e).getMessage();
+            }
+            if (msg == null) {
+                msg = 
ErrorCode.getErrorMessage(ErrorCode.RECORD_READER_MALFORMED_INPUT_STREAM);
+            }
+            long lineNum = lineNumber.getAsLong() + 
jsonParser.getCurrentLocation().getLineNr() - 1;
+            JsonStreamContext parsingContext = jsonParser.getParsingContext();
+            String fieldName = "N/A";
+            while (parsingContext != null) {
+                String currentFieldName = parsingContext.getCurrentName();
+                if (currentFieldName != null) {
+                    fieldName = currentFieldName;
+                    break;
+                }
+                parsingContext = parsingContext.getParent();
+            }
+
+            return 
HyracksDataException.create(org.apache.hyracks.api.exceptions.ErrorCode.PARSING_ERROR,
+                    dataSourceName.get(), lineNum, fieldName, msg);
+        }
+        return new 
RuntimeDataException(ErrorCode.RECORD_READER_MALFORMED_INPUT_STREAM, e);
+    }
 }

Reply via email to