[asterixdb] 05/20: [ASTERIXDB-2713][EXT] Add CSV & TSV support for external dataset

mblow Wed, 13 May 2020 17:06:32 -0700

This is an automated email from the ASF dual-hosted git repository.

mblow pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/asterixdb.git


commit e91d2c6847829b22bfb9f8b7917cea1000a61980
Author: Ali Alsuliman <[email protected]>
AuthorDate: Sun Apr 12 14:05:32 2020 -0700

    [ASTERIXDB-2713][EXT] Add CSV & TSV support for external dataset
    
    - user model changes: no
    - storage format changes: no
    - interface changes: no
    
    Details:
    Add CSV support for external dataset.
     - support S3
     - add boolean parser to Hyracks
    
    Change-Id: Id1790fa73461e9f4a5fb443c51c1905ac588cee6
    Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/5743
    Tested-by: Jenkins <[email protected]>
    Integration-Tests: Jenkins <[email protected]>
    Reviewed-by: Murtadha Hubail <[email protected]>
---
 asterixdb/asterix-app/data/csv/01.csv              |   3 +
 asterixdb/asterix-app/data/csv/02.csv              |   3 +
 asterixdb/asterix-app/data/csv/sample_09.csv       |  17 ++
 asterixdb/asterix-app/data/csv/sample_10.csv       |  39 +++++
 asterixdb/asterix-app/data/csv/sample_11.csv       |   4 +
 asterixdb/asterix-app/data/csv/sample_12.csv       |  15 ++
 asterixdb/asterix-app/data/tsv/01.tsv              |   3 +
 asterixdb/asterix-app/data/tsv/02.tsv              |   3 +
 asterixdb/asterix-app/data/tsv/sample_01.tsv       |  28 ++++
 .../asterix/app/translator/QueryTranslator.java    |   2 +
 .../aws/AwsS3ExternalDatasetTest.java              |  27 ++++
 .../csv-parser-001/csv-parser-001.1.ddl.sqlpp      |  32 ++++
 .../csv-parser-001/csv-parser-001.2.query.sqlpp    |  22 +++
 .../csv-parser-001/csv-parser-001.3.query.sqlpp    |  22 +++
 .../csv-parser-001/csv-parser-001.4.query.sqlpp    |  22 +++
 .../csv-parser-001/csv-parser-001.5.query.sqlpp    |  22 +++
 .../csv-parser-001/csv-parser-001.6.ddl.sqlpp      |  20 +++
 .../tsv-parser-001/tsv-parser-002.1.ddl.sqlpp      |  27 ++++
 .../tsv-parser-001/tsv-parser-002.2.query.sqlpp    |  22 +++
 .../tsv-parser-001/tsv-parser-002.3.ddl.sqlpp      |  20 +++
 .../aws/s3/001/query-dataset.000.ddl.sqlpp         |  36 +++++
 .../aws/s3/001/query-dataset.002.query.sqlpp       |  22 +++
 .../aws/s3/001/query-dataset.003.ddl.sqlpp         |  20 +++
 .../aws/s3/002/query-dataset.000.ddl.sqlpp         |  36 +++++
 .../aws/s3/002/query-dataset.002.query.sqlpp       |  22 +++
 .../aws/s3/002/query-dataset.003.ddl.sqlpp         |  20 +++
 .../csv-parser-001/csv-parser-001.2.adm            |  15 ++
 .../csv-parser-001/csv-parser-001.3.adm            |  32 ++++
 .../csv-parser-001/csv-parser-001.4.adm            |   4 +
 .../csv-parser-001/csv-parser-001.5.adm            |  13 ++
 .../tsv-parser-001/tsv-parser-001.2.adm            |  28 ++++
 .../aws/s3/001/external_dataset.001.adm            |   6 +
 .../aws/s3/002/external_dataset.001.adm            |   6 +
 .../runtimets/testsuite_external_dataset.xml       |  10 ++
 .../test/resources/runtimets/testsuite_sqlpp.xml   |  12 ++
 .../CSVToRecordWithMetadataAndPKConverter.java     |  13 +-
 .../record/reader/aws/AwsS3InputStreamFactory.java |  17 +-
 .../record/reader/stream/LineRecordReader.java     |   5 +-
 .../reader/stream/QuotedLineRecordReader.java      |  67 +++++---
 .../external/parser/DelimitedDataParser.java       |  31 ++--
 .../parser/factory/DelimitedDataParserFactory.java |  44 +----
 .../external/provider/AdapterFactoryProvider.java  |  15 +-
 .../util/ExternalDataCompatibilityUtils.java       |  17 +-
 .../external/util/ExternalDataConstants.java       |   8 +
 .../asterix/external/util/ExternalDataUtils.java   |  88 +++++-----
 .../common/data/parsers/BooleanParserFactory.java  |  62 +++++++
 .../std/file/DelimitedDataTupleParserFactory.java  |   9 +-
 .../file/FieldCursorForDelimitedDataParser.java    | 179 ++++++++++++---------
 .../hyracks/dataflow/std/file/CursorTest.java      |   5 +-
 49 files changed, 957 insertions(+), 238 deletions(-)

diff --git a/asterixdb/asterix-app/data/csv/01.csv 
b/asterixdb/asterix-app/data/csv/01.csv
new file mode 100644
index 0000000..6957e76
--- /dev/null
+++ b/asterixdb/asterix-app/data/csv/01.csv
@@ -0,0 +1,3 @@
+1,,"good","recommend"
+2,,"bad","not recommend"
+3,,"good",
\ No newline at end of file
diff --git a/asterixdb/asterix-app/data/csv/02.csv 
b/asterixdb/asterix-app/data/csv/02.csv
new file mode 100644
index 0000000..630843f
--- /dev/null
+++ b/asterixdb/asterix-app/data/csv/02.csv
@@ -0,0 +1,3 @@
+4,2018,"good","recommend"
+5,2018,,"not recommend"
+6,2018,"good",
\ No newline at end of file
diff --git a/asterixdb/asterix-app/data/csv/sample_09.csv 
b/asterixdb/asterix-app/data/csv/sample_09.csv
new file mode 100644
index 0000000..b14219d
--- /dev/null
+++ b/asterixdb/asterix-app/data/csv/sample_09.csv
@@ -0,0 +1,17 @@
+a,b,c,d,e
+0,",     boo", 1,2,3
+1,"","",❤,
+2,3,4,\n,
+3,"quoted ""f"" field",,,
+4,4,,,
+5,"{""vehicle"": ""car"", ""location"": [2.0, 0.1]}",,,
+6,2,3,,
+7,8,9,,
+8,2,3,,
+9,8,9,,
+10,"field
+""f""
+with multiple lines",,,
+11,4,,,
+12,5,ʤ,,
+John,Green,111 downtown st.,"city, state",99999
\ No newline at end of file
diff --git a/asterixdb/asterix-app/data/csv/sample_10.csv 
b/asterixdb/asterix-app/data/csv/sample_10.csv
new file mode 100644
index 0000000..3beee08
--- /dev/null
+++ b/asterixdb/asterix-app/data/csv/sample_10.csv
@@ -0,0 +1,39 @@
+1,"?/ Text ending with a backslash  / \",2000-09-03 07:12:22
+2,non quoted text!yes......,2003-08-09 22:34:19
+3,Text with more sentences. Another sentence.,2003-09-12 05:29:12
+4,"Quoted text.. yes.",2003-09-13 17:21:49
+5,Another text,2003-01-21 23:31:41
+6,Text with' quotes.,2003-09-14 20:15:50
+7,Text with quote's,2003-09-14 18:34:03
+8,"Text with quotes '",2003-01-28 20:32:13
+9,"Text with quotes """,2003-01-18 11:44:15
+10,Text with question marks!?!?,2003-09-18 06:25:56
+11,""" Text that starts with quotes",2003-09-12 00:31:24
+12,"Text with \"" backslash and quotes",2003-09-13 20:30:06
+13,"Text with \"" backslash and quotes\""",2003-09-14 16:20:36
+14,"Text that has comma ,",2003-09-12 08:21:18
+15,"Text that has "","" quoted comma",2003-09-12 08:21:18
+16,",Text that has ",2003-09-12 08:21:18
+17,","",Text that has ",2003-09-12 08:21:18
+18,"Text with commas,inside it., yes",2003-09-13 23:42:14
+19,"Text that has \n inside ",2003-09-12 08:21:18
+20,"Text that has \\\n inside ",2003-09-12 08:21:18
+21,text with :),2003-09-05 19:15:34
+22,"Text that has \\\"" inside \\",2003-09-12 08:21:18
+23,"Text that has \\\"" inside \\""",2003-09-12 08:21:18
+24,"""text that spans multiple
+Lines and more
+Lines ane more and more
+Lines ...
+And yet more lines
+And more""",2011-09-19 01:09:09
+25,"Text ""
+more lines",2011-09-19 01:09:09
+26,"""
+",2011-09-19 01:09:09
+27,"Text",""
+28,"Text","2011-09-19 01:09:09"
+29,"Text\.","2011-09-19 01:09:09"
+30,Text\.,"2011-09-19 01:09:09"
+31,"\.Text","2011-09-19 01:09:09"
+32,\.Text,"2011-09-19 01:09:09"
\ No newline at end of file
diff --git a/asterixdb/asterix-app/data/csv/sample_11.csv 
b/asterixdb/asterix-app/data/csv/sample_11.csv
new file mode 100644
index 0000000..b9a9571
--- /dev/null
+++ b/asterixdb/asterix-app/data/csv/sample_11.csv
@@ -0,0 +1,4 @@
+1,","", b", 3,4,5
+","", b",4, 3,4,5
+,,,,
+"dd",,,,
\ No newline at end of file
diff --git a/asterixdb/asterix-app/data/csv/sample_12.csv 
b/asterixdb/asterix-app/data/csv/sample_12.csv
new file mode 100644
index 0000000..2ab7c6d
--- /dev/null
+++ b/asterixdb/asterix-app/data/csv/sample_12.csv
@@ -0,0 +1,15 @@
+1,true,"text"
+2,false,"text"
+3,true,"text"
+4,true,""
+5,false,
+6,true,"text""
+more lines"
+7,false,"""
+"
+8,true,""
+9,false,"text"""
+10,false,text\.
+11,true,"text\."
+,false,\.text
+13,true,"\.text"
\ No newline at end of file
diff --git a/asterixdb/asterix-app/data/tsv/01.tsv 
b/asterixdb/asterix-app/data/tsv/01.tsv
new file mode 100644
index 0000000..98876c7
--- /dev/null
+++ b/asterixdb/asterix-app/data/tsv/01.tsv
@@ -0,0 +1,3 @@
+1              "good"  "recommend"
+2              "bad"   "not recommend"
+3              "good"  "recommend"
\ No newline at end of file
diff --git a/asterixdb/asterix-app/data/tsv/02.tsv 
b/asterixdb/asterix-app/data/tsv/02.tsv
new file mode 100644
index 0000000..c01ce7c
--- /dev/null
+++ b/asterixdb/asterix-app/data/tsv/02.tsv
@@ -0,0 +1,3 @@
+4      2018    "good"  "recommend"
+5      2018            "not recommend"
+6      2018    "good"  "recommend"
\ No newline at end of file
diff --git a/asterixdb/asterix-app/data/tsv/sample_01.tsv 
b/asterixdb/asterix-app/data/tsv/sample_01.tsv
new file mode 100644
index 0000000..aab289a
--- /dev/null
+++ b/asterixdb/asterix-app/data/tsv/sample_01.tsv
@@ -0,0 +1,28 @@
+11     55      text field wih , charrrrrrrrrrr true    90      0.666666667
+12     55      text field with " charrrrrrrrrr false   90      0.666666667
+14     55      text field with ' charrrrrrrrrr false   90      0.666666667
+15     55      text field with \ charrrrrrrrrr false   90      0.666666667
+16     55      text field wih \, char          true    90      0.666666667
+17     55      text field with \" charrrrrrrrr false   90      0.666666667
+18     55      text field with \' charrrrrrrrr false   90      0.666666667
+19     55      text field with \\ charrrrrrrrr false   90      0.666666667
+20     55      text field ending with  charr , false   90      0.666666667
+21     55      text field ending with  charr " false   90      0.666666667
+22     55      text field ending with  charr ' false   90      0.666666667
+23     55      text field ending with  charr \ false   90      0.666666667
+24     55      text field ending with charr \, false   90      0.666666667
+25     55      text field ending with charr \" false   90      0.666666667
+26     55      text field ending with charr \' false   90      0.666666667
+27     55      text field ending with charr \\ false   90      0.666666667
+28     55      ,text field starting with charr false   90      0.666666667
+29     55      "text field starting with charr false   90      0.666666667
+30     55      'text field starting with charr false   90      0.666666667
+31     55      \text field starting with charr false   90      0.666666667
+32     55      \,text field starting with char false   90      0.666666667
+33     55      \"text field starting with char false   90      0.666666667
+34     55      \'text field starting with char false   90      0.666666667
+35     55      \\text field starting with char false   90      0.666666667
+36     55      "text field inside   with char" false   90      0.666666667
+37     55        text field with charrrrrrrrr  false   90      0.666666667
+38     55      text field with "" charrrrrrrrr false   90      0.666666667
+39     55      text field "with" charrrrrrrrrr false   90      0.666666667
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/translator/QueryTranslator.java
 
b/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/translator/QueryTranslator.java
index c1eea7c..e211531 100644
--- 
a/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/translator/QueryTranslator.java
+++ 
b/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/translator/QueryTranslator.java
@@ -85,6 +85,7 @@ import org.apache.asterix.external.indexing.ExternalFile;
 import org.apache.asterix.external.indexing.IndexingConstants;
 import org.apache.asterix.external.operators.FeedIntakeOperatorNodePushable;
 import org.apache.asterix.external.util.ExternalDataConstants;
+import org.apache.asterix.external.util.ExternalDataUtils;
 import org.apache.asterix.formats.nontagged.TypeTraitProvider;
 import org.apache.asterix.lang.common.base.IReturningStatement;
 import org.apache.asterix.lang.common.base.IRewriterFactory;
@@ -727,6 +728,7 @@ public class QueryTranslator extends AbstractLangTranslator 
implements IStatemen
                 case EXTERNAL:
                     ExternalDetailsDecl externalDetails = 
(ExternalDetailsDecl) dd.getDatasetDetailsDecl();
                     Map<String, String> properties = 
createExternalDatasetProperties(dd, metadataProvider, mdTxnCtx);
+                    ExternalDataUtils.defaultConfiguration(properties);
                     datasetDetails = new 
ExternalDatasetDetails(externalDetails.getAdapter(), properties, new Date(),
                             TransactionState.COMMIT);
                     break;
diff --git 
a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetTest.java
 
b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetTest.java
index 3b4cdf8..d2158ba 100644
--- 
a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetTest.java
+++ 
b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetTest.java
@@ -18,9 +18,12 @@
  */
 package org.apache.asterix.test.external_dataset.aws;
 
+import static org.apache.hyracks.util.file.FileUtil.joinPath;
+
 import java.net.InetAddress;
 import java.net.InetSocketAddress;
 import java.net.URI;
+import java.nio.file.Paths;
 import java.util.Collection;
 import java.util.HashMap;
 import java.util.Map;
@@ -66,9 +69,13 @@ public class AwsS3ExternalDatasetTest {
     private static S3Client client;
     private static final String S3_MOCK_SERVER_BUCKET = "playground";
     private static final String S3_MOCK_SERVER_BUCKET_DEFINITION = 
"json-data/reviews/"; // data resides here
+    private static final String S3_MOCK_SERVER_BUCKET_CSV_DEFINITION = 
"csv-data/reviews/"; // data resides here
+    private static final String S3_MOCK_SERVER_BUCKET_TSV_DEFINITION = 
"tsv-data/reviews/"; // data resides here
     private static final String S3_MOCK_SERVER_REGION = "us-west-2";
     private static final int S3_MOCK_SERVER_PORT = 8001;
     private static final String S3_MOCK_SERVER_HOSTNAME = "http://localhost:"; 
+ S3_MOCK_SERVER_PORT;
+    private static final String CSV_DATA_PATH = joinPath("data", "csv");
+    private static final String TSV_DATA_PATH = joinPath("data", "tsv");
 
     @BeforeClass
     public static void setUp() throws Exception {
@@ -210,6 +217,26 @@ public class AwsS3ExternalDatasetTest {
                 PutObjectRequest.builder().bucket(S3_MOCK_SERVER_BUCKET)
                         .key(S3_MOCK_SERVER_BUCKET_DEFINITION + 
"2019/q2/2.json").build(),
                 RequestBody.fromString("{\"id\": 14, \"year\": 2019, 
\"quarter\": 2, \"review\": \"bad\"}"));
+
+        LOGGER.info("Adding CSV files to the bucket");
+        client.putObject(
+                PutObjectRequest.builder().bucket(S3_MOCK_SERVER_BUCKET)
+                        .key(S3_MOCK_SERVER_BUCKET_CSV_DEFINITION + 
"01.csv").build(),
+                RequestBody.fromFile(Paths.get(CSV_DATA_PATH, "01.csv")));
+        client.putObject(
+                PutObjectRequest.builder().bucket(S3_MOCK_SERVER_BUCKET)
+                        .key(S3_MOCK_SERVER_BUCKET_CSV_DEFINITION + 
"2018/01.csv").build(),
+                RequestBody.fromFile(Paths.get(CSV_DATA_PATH, "02.csv")));
+
+        LOGGER.info("Adding TSV files to the bucket");
+        client.putObject(
+                PutObjectRequest.builder().bucket(S3_MOCK_SERVER_BUCKET)
+                        .key(S3_MOCK_SERVER_BUCKET_TSV_DEFINITION + 
"01.tsv").build(),
+                RequestBody.fromFile(Paths.get(TSV_DATA_PATH, "01.tsv")));
+        client.putObject(
+                PutObjectRequest.builder().bucket(S3_MOCK_SERVER_BUCKET)
+                        .key(S3_MOCK_SERVER_BUCKET_TSV_DEFINITION + 
"2018/01.tsv").build(),
+                RequestBody.fromFile(Paths.get(TSV_DATA_PATH, "02.tsv")));
         LOGGER.info("Files added successfully");
     }
 }
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/csv-tsv-parser/csv-parser-001/csv-parser-001.1.ddl.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/csv-tsv-parser/csv-parser-001/csv-parser-001.1.ddl.sqlpp
new file mode 100644
index 0000000..5728e78
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/csv-tsv-parser/csv-parser-001/csv-parser-001.1.ddl.sqlpp
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+DROP DATAVERSE test IF EXISTS;
+CREATE DATAVERSE test;
+
+USE test;
+
+CREATE TYPE t1 AS {f1: string, f2: string, f3: string, f4: string, f5: string};
+CREATE TYPE t2 AS {f1: string, f2: string, f3: string};
+CREATE TYPE t3 AS {f1: int?, f2: boolean, f3: string?};
+
+CREATE EXTERNAL DATASET ds1(t1) USING 
localfs(("path"="asterix_nc1://data/csv/sample_09.csv"), ("format"="csv"));
+CREATE EXTERNAL DATASET ds2(t2) USING 
localfs(("path"="asterix_nc1://data/csv/sample_10.csv"), ("format"="csv"));
+CREATE EXTERNAL DATASET ds3(t1) USING 
localfs(("path"="asterix_nc1://data/csv/sample_11.csv"), ("format"="csv"));
+CREATE EXTERNAL DATASET ds4(t3) USING 
localfs(("path"="asterix_nc1://data/csv/sample_12.csv"), ("format"="csv"));
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/csv-tsv-parser/csv-parser-001/csv-parser-001.2.query.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/csv-tsv-parser/csv-parser-001/csv-parser-001.2.query.sqlpp
new file mode 100644
index 0000000..d870372
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/csv-tsv-parser/csv-parser-001/csv-parser-001.2.query.sqlpp
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE test;
+
+FROM ds1 v SELECT VALUE v;
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/csv-tsv-parser/csv-parser-001/csv-parser-001.3.query.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/csv-tsv-parser/csv-parser-001/csv-parser-001.3.query.sqlpp
new file mode 100644
index 0000000..64a2f8a
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/csv-tsv-parser/csv-parser-001/csv-parser-001.3.query.sqlpp
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE test;
+
+FROM ds2 v SELECT VALUE v;
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/csv-tsv-parser/csv-parser-001/csv-parser-001.4.query.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/csv-tsv-parser/csv-parser-001/csv-parser-001.4.query.sqlpp
new file mode 100644
index 0000000..313198c
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/csv-tsv-parser/csv-parser-001/csv-parser-001.4.query.sqlpp
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE test;
+
+FROM ds3 v SELECT VALUE v;
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/csv-tsv-parser/csv-parser-001/csv-parser-001.5.query.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/csv-tsv-parser/csv-parser-001/csv-parser-001.5.query.sqlpp
new file mode 100644
index 0000000..065de4e
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/csv-tsv-parser/csv-parser-001/csv-parser-001.5.query.sqlpp
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE test;
+
+FROM ds4 v SELECT VALUE v;
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/csv-tsv-parser/csv-parser-001/csv-parser-001.6.ddl.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/csv-tsv-parser/csv-parser-001/csv-parser-001.6.ddl.sqlpp
new file mode 100644
index 0000000..86a1b59
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/csv-tsv-parser/csv-parser-001/csv-parser-001.6.ddl.sqlpp
@@ -0,0 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+DROP DATAVERSE test;
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/csv-tsv-parser/tsv-parser-001/tsv-parser-002.1.ddl.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/csv-tsv-parser/tsv-parser-001/tsv-parser-002.1.ddl.sqlpp
new file mode 100644
index 0000000..c0faf16
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/csv-tsv-parser/tsv-parser-001/tsv-parser-002.1.ddl.sqlpp
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+DROP DATAVERSE test IF EXISTS;
+CREATE DATAVERSE test;
+
+USE test;
+
+CREATE TYPE t1 AS {f1: int, f2: int, f3: string, f4: boolean, f5: bigint, f6: 
double};
+
+CREATE EXTERNAL DATASET ds1(t1) USING 
localfs(("path"="asterix_nc1://data/tsv/sample_01.tsv"), ("format"="tsv"))
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/csv-tsv-parser/tsv-parser-001/tsv-parser-002.2.query.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/csv-tsv-parser/tsv-parser-001/tsv-parser-002.2.query.sqlpp
new file mode 100644
index 0000000..d870372
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/csv-tsv-parser/tsv-parser-001/tsv-parser-002.2.query.sqlpp
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE test;
+
+FROM ds1 v SELECT VALUE v;
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/csv-tsv-parser/tsv-parser-001/tsv-parser-002.3.ddl.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/csv-tsv-parser/tsv-parser-001/tsv-parser-002.3.ddl.sqlpp
new file mode 100644
index 0000000..86a1b59
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/csv-tsv-parser/tsv-parser-001/tsv-parser-002.3.ddl.sqlpp
@@ -0,0 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+DROP DATAVERSE test;
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/001/query-dataset.000.ddl.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/001/query-dataset.000.ddl.sqlpp
new file mode 100644
index 0000000..b906039
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/001/query-dataset.000.ddl.sqlpp
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+DROP DATAVERSE test IF EXISTS;
+CREATE DATAVERSE test;
+USE test;
+
+DROP TYPE test IF EXISTS;
+CREATE TYPE test AS {id: int, year: int?, review: string, details: string?};
+
+DROP DATASET test IF EXISTS;
+CREATE EXTERNAL DATASET test(test) USING S3 (
+("accessKey"="dummyAccessKey"),
+("secretKey"="dummySecretKey"),
+("region"="us-west-2"),
+("serviceEndpoint"="http://localhost:8001";),
+("container"="playground"),
+("definition"="csv-data/reviews"),
+("format"="csv")
+);
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/001/query-dataset.002.query.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/001/query-dataset.002.query.sqlpp
new file mode 100644
index 0000000..6e31eb3
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/001/query-dataset.002.query.sqlpp
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE test;
+
+FROM test SELECT VALUE test ORDER BY id ASC;
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/001/query-dataset.003.ddl.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/001/query-dataset.003.ddl.sqlpp
new file mode 100644
index 0000000..0ff713d
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/001/query-dataset.003.ddl.sqlpp
@@ -0,0 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+DROP DATASET test IF EXISTS;
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/002/query-dataset.000.ddl.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/002/query-dataset.000.ddl.sqlpp
new file mode 100644
index 0000000..d385bee
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/002/query-dataset.000.ddl.sqlpp
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+DROP DATAVERSE test IF EXISTS;
+CREATE DATAVERSE test;
+USE test;
+
+DROP TYPE test IF EXISTS;
+CREATE TYPE test AS {id: int, year: int?, review: string, details: string?};
+
+DROP DATASET test IF EXISTS;
+CREATE EXTERNAL DATASET test(test) USING S3 (
+("accessKey"="dummyAccessKey"),
+("secretKey"="dummySecretKey"),
+("region"="us-west-2"),
+("serviceEndpoint"="http://localhost:8001";),
+("container"="playground"),
+("definition"="tsv-data/reviews"),
+("format"="tsv")
+);
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/002/query-dataset.002.query.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/002/query-dataset.002.query.sqlpp
new file mode 100644
index 0000000..6e31eb3
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/002/query-dataset.002.query.sqlpp
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE test;
+
+FROM test SELECT VALUE test ORDER BY id ASC;
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/002/query-dataset.003.ddl.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/002/query-dataset.003.ddl.sqlpp
new file mode 100644
index 0000000..0ff713d
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/002/query-dataset.003.ddl.sqlpp
@@ -0,0 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+DROP DATASET test IF EXISTS;
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/csv-tsv-parser/csv-parser-001/csv-parser-001.2.adm
 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/csv-tsv-parser/csv-parser-001/csv-parser-001.2.adm
new file mode 100644
index 0000000..5c84fb8
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/csv-tsv-parser/csv-parser-001/csv-parser-001.2.adm
@@ -0,0 +1,15 @@
+{ "f1": "a", "f2": "b", "f3": "c", "f4": "d", "f5": "e" }
+{ "f1": "0", "f2": ",     boo", "f3": " 1", "f4": "2", "f5": "3" }
+{ "f1": "1", "f2": "", "f3": "", "f4": "❤", "f5": "" }
+{ "f1": "2", "f2": "3", "f3": "4", "f4": "\\n", "f5": "" }
+{ "f1": "3", "f2": "quoted \"f\" field", "f3": "", "f4": "", "f5": "" }
+{ "f1": "4", "f2": "4", "f3": "", "f4": "", "f5": "" }
+{ "f1": "5", "f2": "{\"vehicle\": \"car\", \"location\": [2.0, 0.1]}", "f3": 
"", "f4": "", "f5": "" }
+{ "f1": "6", "f2": "2", "f3": "3", "f4": "", "f5": "" }
+{ "f1": "7", "f2": "8", "f3": "9", "f4": "", "f5": "" }
+{ "f1": "8", "f2": "2", "f3": "3", "f4": "", "f5": "" }
+{ "f1": "9", "f2": "8", "f3": "9", "f4": "", "f5": "" }
+{ "f1": "10", "f2": "field\n\"f\"\nwith multiple lines", "f3": "", "f4": "", 
"f5": "" }
+{ "f1": "11", "f2": "4", "f3": "", "f4": "", "f5": "" }
+{ "f1": "12", "f2": "5", "f3": "ʤ", "f4": "", "f5": "" }
+{ "f1": "John", "f2": "Green", "f3": "111 downtown st.", "f4": "city, state", 
"f5": "99999" }
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/csv-tsv-parser/csv-parser-001/csv-parser-001.3.adm
 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/csv-tsv-parser/csv-parser-001/csv-parser-001.3.adm
new file mode 100644
index 0000000..80f5fb7
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/csv-tsv-parser/csv-parser-001/csv-parser-001.3.adm
@@ -0,0 +1,32 @@
+{ "f1": "1", "f2": "?/ Text ending with a backslash  / \\", "f3": "2000-09-03 
07:12:22" }
+{ "f1": "2", "f2": "non quoted text!yes......", "f3": "2003-08-09 22:34:19" }
+{ "f1": "3", "f2": "Text with more sentences. Another sentence.", "f3": 
"2003-09-12 05:29:12" }
+{ "f1": "4", "f2": "Quoted text.. yes.", "f3": "2003-09-13 17:21:49" }
+{ "f1": "5", "f2": "Another text", "f3": "2003-01-21 23:31:41" }
+{ "f1": "6", "f2": "Text with' quotes.", "f3": "2003-09-14 20:15:50" }
+{ "f1": "7", "f2": "Text with quote's", "f3": "2003-09-14 18:34:03" }
+{ "f1": "8", "f2": "Text with quotes '", "f3": "2003-01-28 20:32:13" }
+{ "f1": "9", "f2": "Text with quotes \"", "f3": "2003-01-18 11:44:15" }
+{ "f1": "10", "f2": "Text with question marks!?!?", "f3": "2003-09-18 
06:25:56" }
+{ "f1": "11", "f2": "\" Text that starts with quotes", "f3": "2003-09-12 
00:31:24" }
+{ "f1": "12", "f2": "Text with \\\" backslash and quotes", "f3": "2003-09-13 
20:30:06" }
+{ "f1": "13", "f2": "Text with \\\" backslash and quotes\\\"", "f3": 
"2003-09-14 16:20:36" }
+{ "f1": "14", "f2": "Text that has comma ,", "f3": "2003-09-12 08:21:18" }
+{ "f1": "15", "f2": "Text that has \",\" quoted comma", "f3": "2003-09-12 
08:21:18" }
+{ "f1": "16", "f2": ",Text that has ", "f3": "2003-09-12 08:21:18" }
+{ "f1": "17", "f2": ",\",Text that has ", "f3": "2003-09-12 08:21:18" }
+{ "f1": "18", "f2": "Text with commas,inside it., yes", "f3": "2003-09-13 
23:42:14" }
+{ "f1": "19", "f2": "Text that has \\n inside ", "f3": "2003-09-12 08:21:18" }
+{ "f1": "20", "f2": "Text that has \\\\\\n inside ", "f3": "2003-09-12 
08:21:18" }
+{ "f1": "21", "f2": "text with :)", "f3": "2003-09-05 19:15:34" }
+{ "f1": "22", "f2": "Text that has \\\\\\\" inside \\\\", "f3": "2003-09-12 
08:21:18" }
+{ "f1": "23", "f2": "Text that has \\\\\\\" inside \\\\\"", "f3": "2003-09-12 
08:21:18" }
+{ "f1": "24", "f2": "\"text that spans multiple\nLines and more\nLines ane 
more and more\nLines ...\nAnd yet more lines\nAnd more\"", "f3": "2011-09-19 
01:09:09" }
+{ "f1": "25", "f2": "Text \"\nmore lines", "f3": "2011-09-19 01:09:09" }
+{ "f1": "26", "f2": "\"\n", "f3": "2011-09-19 01:09:09" }
+{ "f1": "27", "f2": "Text", "f3": "" }
+{ "f1": "28", "f2": "Text", "f3": "2011-09-19 01:09:09" }
+{ "f1": "29", "f2": "Text\\.", "f3": "2011-09-19 01:09:09" }
+{ "f1": "30", "f2": "Text\\.", "f3": "2011-09-19 01:09:09" }
+{ "f1": "31", "f2": "\\.Text", "f3": "2011-09-19 01:09:09" }
+{ "f1": "32", "f2": "\\.Text", "f3": "2011-09-19 01:09:09" }
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/csv-tsv-parser/csv-parser-001/csv-parser-001.4.adm
 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/csv-tsv-parser/csv-parser-001/csv-parser-001.4.adm
new file mode 100644
index 0000000..5c61b4a
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/csv-tsv-parser/csv-parser-001/csv-parser-001.4.adm
@@ -0,0 +1,4 @@
+{ "f1": "1", "f2": ",\", b", "f3": " 3", "f4": "4", "f5": "5" }
+{ "f1": ",\", b", "f2": "4", "f3": " 3", "f4": "4", "f5": "5" }
+{ "f1": "", "f2": "", "f3": "", "f4": "", "f5": "" }
+{ "f1": "dd", "f2": "", "f3": "", "f4": "", "f5": "" }
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/csv-tsv-parser/csv-parser-001/csv-parser-001.5.adm
 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/csv-tsv-parser/csv-parser-001/csv-parser-001.5.adm
new file mode 100644
index 0000000..4b80e26
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/csv-tsv-parser/csv-parser-001/csv-parser-001.5.adm
@@ -0,0 +1,13 @@
+{ "f1": 1, "f2": true, "f3": "text" }
+{ "f1": 2, "f2": false, "f3": "text" }
+{ "f1": 3, "f2": true, "f3": "text" }
+{ "f1": 4, "f2": true, "f3": null }
+{ "f1": 5, "f2": false, "f3": null }
+{ "f1": 6, "f2": true, "f3": "text\"\nmore lines" }
+{ "f1": 7, "f2": false, "f3": "\"\n" }
+{ "f1": 8, "f2": true, "f3": null }
+{ "f1": 9, "f2": false, "f3": "text\"" }
+{ "f1": 10, "f2": false, "f3": "text\\." }
+{ "f1": 11, "f2": true, "f3": "text\\." }
+{ "f1": null, "f2": false, "f3": "\\.text" }
+{ "f1": 13, "f2": true, "f3": "\\.text" }
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/csv-tsv-parser/tsv-parser-001/tsv-parser-001.2.adm
 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/csv-tsv-parser/tsv-parser-001/tsv-parser-001.2.adm
new file mode 100644
index 0000000..fbe287b
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/csv-tsv-parser/tsv-parser-001/tsv-parser-001.2.adm
@@ -0,0 +1,28 @@
+{ "f1": 11, "f2": 55, "f3": "text field wih , charrrrrrrrrrr", "f4": true, 
"f5": 90, "f6": 0.666666667 }
+{ "f1": 12, "f2": 55, "f3": "text field with \" charrrrrrrrrr", "f4": false, 
"f5": 90, "f6": 0.666666667 }
+{ "f1": 14, "f2": 55, "f3": "text field with ' charrrrrrrrrr", "f4": false, 
"f5": 90, "f6": 0.666666667 }
+{ "f1": 15, "f2": 55, "f3": "text field with \\ charrrrrrrrrr", "f4": false, 
"f5": 90, "f6": 0.666666667 }
+{ "f1": 16, "f2": 55, "f3": "text field wih \\, char         ", "f4": true, 
"f5": 90, "f6": 0.666666667 }
+{ "f1": 17, "f2": 55, "f3": "text field with \\\" charrrrrrrrr", "f4": false, 
"f5": 90, "f6": 0.666666667 }
+{ "f1": 18, "f2": 55, "f3": "text field with \\' charrrrrrrrr", "f4": false, 
"f5": 90, "f6": 0.666666667 }
+{ "f1": 19, "f2": 55, "f3": "text field with \\\\ charrrrrrrrr", "f4": false, 
"f5": 90, "f6": 0.666666667 }
+{ "f1": 20, "f2": 55, "f3": "text field ending with  charr ,", "f4": false, 
"f5": 90, "f6": 0.666666667 }
+{ "f1": 21, "f2": 55, "f3": "text field ending with  charr \"", "f4": false, 
"f5": 90, "f6": 0.666666667 }
+{ "f1": 22, "f2": 55, "f3": "text field ending with  charr '", "f4": false, 
"f5": 90, "f6": 0.666666667 }
+{ "f1": 23, "f2": 55, "f3": "text field ending with  charr \\", "f4": false, 
"f5": 90, "f6": 0.666666667 }
+{ "f1": 24, "f2": 55, "f3": "text field ending with charr \\,", "f4": false, 
"f5": 90, "f6": 0.666666667 }
+{ "f1": 25, "f2": 55, "f3": "text field ending with charr \\\"", "f4": false, 
"f5": 90, "f6": 0.666666667 }
+{ "f1": 26, "f2": 55, "f3": "text field ending with charr \\'", "f4": false, 
"f5": 90, "f6": 0.666666667 }
+{ "f1": 27, "f2": 55, "f3": "text field ending with charr \\\\", "f4": false, 
"f5": 90, "f6": 0.666666667 }
+{ "f1": 28, "f2": 55, "f3": ",text field starting with charr", "f4": false, 
"f5": 90, "f6": 0.666666667 }
+{ "f1": 29, "f2": 55, "f3": "\"text field starting with charr", "f4": false, 
"f5": 90, "f6": 0.666666667 }
+{ "f1": 30, "f2": 55, "f3": "'text field starting with charr", "f4": false, 
"f5": 90, "f6": 0.666666667 }
+{ "f1": 31, "f2": 55, "f3": "\\text field starting with charr", "f4": false, 
"f5": 90, "f6": 0.666666667 }
+{ "f1": 32, "f2": 55, "f3": "\\,text field starting with char", "f4": false, 
"f5": 90, "f6": 0.666666667 }
+{ "f1": 33, "f2": 55, "f3": "\\\"text field starting with char", "f4": false, 
"f5": 90, "f6": 0.666666667 }
+{ "f1": 34, "f2": 55, "f3": "\\'text field starting with char", "f4": false, 
"f5": 90, "f6": 0.666666667 }
+{ "f1": 35, "f2": 55, "f3": "\\\\text field starting with char", "f4": false, 
"f5": 90, "f6": 0.666666667 }
+{ "f1": 36, "f2": 55, "f3": "\"text field inside   with char\"", "f4": false, 
"f5": 90, "f6": 0.666666667 }
+{ "f1": 37, "f2": 55, "f3": "  text field with charrrrrrrrr ", "f4": false, 
"f5": 90, "f6": 0.666666667 }
+{ "f1": 38, "f2": 55, "f3": "text field with \"\" charrrrrrrrr", "f4": false, 
"f5": 90, "f6": 0.666666667 }
+{ "f1": 39, "f2": 55, "f3": "text field \"with\" charrrrrrrrrr", "f4": false, 
"f5": 90, "f6": 0.666666667 }
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/001/external_dataset.001.adm
 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/001/external_dataset.001.adm
new file mode 100644
index 0000000..93d1b57
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/001/external_dataset.001.adm
@@ -0,0 +1,6 @@
+{ "id": 1, "year": null, "review": "good", "details": "recommend" }
+{ "id": 2, "year": null, "review": "bad", "details": "not recommend" }
+{ "id": 3, "year": null, "review": "good", "details": null }
+{ "id": 4, "year": 2018, "review": "good", "details": "recommend" }
+{ "id": 5, "year": 2018, "review": "", "details": "not recommend" }
+{ "id": 6, "year": 2018, "review": "good", "details": null }
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/002/external_dataset.001.adm
 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/002/external_dataset.001.adm
new file mode 100644
index 0000000..1954b05
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/002/external_dataset.001.adm
@@ -0,0 +1,6 @@
+{ "id": 1, "year": null, "review": "\"good\"", "details": "\"recommend\"" }
+{ "id": 2, "year": null, "review": "\"bad\"", "details": "\"not recommend\"" }
+{ "id": 3, "year": null, "review": "\"good\"", "details": "\"recommend\"" }
+{ "id": 4, "year": 2018, "review": "\"good\"", "details": "\"recommend\"" }
+{ "id": 5, "year": 2018, "review": "", "details": "\"not recommend\"" }
+{ "id": 6, "year": 2018, "review": "\"good\"", "details": "\"recommend\"" }
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset.xml
 
b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset.xml
index cd1fb12..9948209 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset.xml
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset.xml
@@ -24,5 +24,15 @@
         <output-dir compare="Text">aws/s3/000</output-dir>
       </compilation-unit>
     </test-case>
+    <test-case FilePath="external-dataset">
+      <compilation-unit name="aws/s3/001">
+        <output-dir compare="Text">aws/s3/001</output-dir>
+      </compilation-unit>
+    </test-case>
+    <test-case FilePath="external-dataset">
+      <compilation-unit name="aws/s3/002">
+        <output-dir compare="Text">aws/s3/002</output-dir>
+      </compilation-unit>
+    </test-case>
   </test-group>
 </test-suite>
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml 
b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml
index 63db153..a578690 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml
@@ -12249,6 +12249,18 @@
       </compilation-unit>
     </test-case>
   </test-group>
+  <test-group name="csv-tsv-parser">
+    <test-case FilePath="csv-tsv-parser">
+      <compilation-unit name="csv-parser-001">
+        <output-dir compare="Text">csv-parser-001</output-dir>
+      </compilation-unit>
+    </test-case>
+    <test-case FilePath="csv-tsv-parser">
+      <compilation-unit name="tsv-parser-001">
+        <output-dir compare="Text">tsv-parser-001</output-dir>
+      </compilation-unit>
+    </test-case>
+  </test-group>
   <test-group name="binary">
     <test-case FilePath="binary">
       <compilation-unit name="parse">
diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/converter/CSVToRecordWithMetadataAndPKConverter.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/converter/CSVToRecordWithMetadataAndPKConverter.java
index 8255ebb..5c8f219 100644
--- 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/converter/CSVToRecordWithMetadataAndPKConverter.java
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/converter/CSVToRecordWithMetadataAndPKConverter.java
@@ -41,7 +41,7 @@ public class CSVToRecordWithMetadataAndPKConverter
         this.cursor = new FieldCursorForDelimitedDataParser(null, delimiter, 
ExternalDataConstants.QUOTE);
         this.record = new CharArrayRecord();
         this.valueIndex = valueIndex;
-        this.recordWithMetadata = new RecordWithMetadataAndPK<char[]>(record, 
metaType.getFieldTypes(), recordType,
+        this.recordWithMetadata = new RecordWithMetadataAndPK<>(record, 
metaType.getFieldTypes(), recordType,
                 keyIndicator, keyIndexes, keyTypes);
     }
 
@@ -53,16 +53,15 @@ public class CSVToRecordWithMetadataAndPKConverter
         int i = 0;
         int j = 0;
         while (cursor.nextField()) {
-            if (cursor.isDoubleQuoteIncludedInThisField) {
-                cursor.eliminateDoubleQuote(cursor.buffer, cursor.fStart, 
cursor.fEnd - cursor.fStart);
-                cursor.fEnd -= cursor.doubleQuoteCount;
-                cursor.isDoubleQuoteIncludedInThisField = false;
+            if (cursor.fieldHasDoubleQuote()) {
+                cursor.eliminateDoubleQuote();
             }
             if (i == valueIndex) {
-                record.setValue(cursor.buffer, cursor.fStart, cursor.fEnd - 
cursor.fStart);
+                record.setValue(cursor.getBuffer(), cursor.getFieldStart(), 
cursor.getFieldLength());
                 record.endRecord();
             } else {
-                recordWithMetadata.setRawMetadata(j, cursor.buffer, 
cursor.fStart, cursor.fEnd - cursor.fStart);
+                recordWithMetadata.setRawMetadata(j, cursor.getBuffer(), 
cursor.getFieldStart(),
+                        cursor.getFieldLength());
                 j++;
             }
             i++;
diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/AwsS3InputStreamFactory.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/AwsS3InputStreamFactory.java
index a9f7898..6b8bb59 100644
--- 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/AwsS3InputStreamFactory.java
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/AwsS3InputStreamFactory.java
@@ -27,6 +27,7 @@ import java.util.List;
 import java.util.Map;
 
 import org.apache.asterix.common.dataflow.ICcApplicationContext;
+import org.apache.asterix.common.exceptions.AsterixException;
 import org.apache.asterix.common.exceptions.ErrorCode;
 import org.apache.asterix.external.api.AsterixInputStream;
 import org.apache.asterix.external.api.IInputStreamFactory;
@@ -35,7 +36,6 @@ import 
org.apache.hyracks.algebricks.common.constraints.AlgebricksAbsolutePartit
 import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
 import org.apache.hyracks.api.application.IServiceContext;
 import org.apache.hyracks.api.context.IHyracksTaskContext;
-import org.apache.hyracks.api.exceptions.HyracksDataException;
 
 import software.amazon.awssdk.auth.credentials.AwsBasicCredentials;
 import software.amazon.awssdk.auth.credentials.StaticCredentialsProvider;
@@ -77,8 +77,7 @@ public class AwsS3InputStreamFactory implements 
IInputStreamFactory {
     }
 
     @Override
-    public void configure(IServiceContext ctx, Map<String, String> 
configuration)
-            throws AlgebricksException, HyracksDataException {
+    public void configure(IServiceContext ctx, Map<String, String> 
configuration) throws AlgebricksException {
         this.configuration = configuration;
         ICcApplicationContext ccApplicationContext = (ICcApplicationContext) 
ctx.getApplicationContext();
 
@@ -115,13 +114,13 @@ public class AwsS3InputStreamFactory implements 
IInputStreamFactory {
      *
      * @return A list of string paths that point to files only
      *
-     * @throws HyracksDataException HyracksDataException
+     * @throws AsterixException AsterixException
      */
-    private List<S3Object> getFilesOnly(List<S3Object> s3Objects, String 
fileFormat) throws HyracksDataException {
+    private List<S3Object> getFilesOnly(List<S3Object> s3Objects, String 
fileFormat) throws AsterixException {
         List<S3Object> filesOnly = new ArrayList<>();
         String fileExtension = getFileExtension(fileFormat);
         if (fileExtension == null) {
-            throw HyracksDataException.create(ErrorCode.INVALID_FORMAT);
+            throw 
AsterixException.create(ErrorCode.PROVIDER_STREAM_RECORD_READER_UNKNOWN_FORMAT, 
fileFormat);
         }
 
         s3Objects.stream().filter(object -> 
object.key().endsWith(fileExtension)).forEach(filesOnly::add);
@@ -214,8 +213,12 @@ public class AwsS3InputStreamFactory implements 
IInputStreamFactory {
      */
     private String getFileExtension(String format) {
         switch (format.toLowerCase()) {
-            case "json":
+            case ExternalDataConstants.FORMAT_JSON_LOWER_CASE:
                 return ".json";
+            case ExternalDataConstants.FORMAT_CSV:
+                return ".csv";
+            case ExternalDataConstants.FORMAT_TSV:
+                return ".tsv";
             default:
                 return null;
         }
diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/LineRecordReader.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/LineRecordReader.java
index 0b41d4b..be600ed 100644
--- 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/LineRecordReader.java
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/LineRecordReader.java
@@ -36,8 +36,9 @@ public class LineRecordReader extends StreamRecordReader {
     protected int newlineLength;
     protected int recordNumber = 0;
     protected boolean nextIsHeader = false;
-    private static final List<String> recordReaderFormats = 
Collections.unmodifiableList(
-            Arrays.asList(ExternalDataConstants.FORMAT_DELIMITED_TEXT, 
ExternalDataConstants.FORMAT_CSV));
+    private static final List<String> recordReaderFormats =
+            
Collections.unmodifiableList(Arrays.asList(ExternalDataConstants.FORMAT_DELIMITED_TEXT,
+                    ExternalDataConstants.FORMAT_CSV, 
ExternalDataConstants.FORMAT_TSV));
     private static final String REQUIRED_CONFIGS = "";
 
     @Override
diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/QuotedLineRecordReader.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/QuotedLineRecordReader.java
index 4c4128a..1fd328b 100644
--- 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/QuotedLineRecordReader.java
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/QuotedLineRecordReader.java
@@ -32,11 +32,10 @@ import 
org.apache.hyracks.api.exceptions.HyracksDataException;
 public class QuotedLineRecordReader extends LineRecordReader {
 
     private char quote;
-    private boolean prevCharEscape;
-    private boolean inQuote;
+    private char quoteEscape;
     private static final List<String> recordReaderFormats = 
Collections.unmodifiableList(
             Arrays.asList(ExternalDataConstants.FORMAT_DELIMITED_TEXT, 
ExternalDataConstants.FORMAT_CSV));
-    private static final String REQUIRED_CONFIGS = "quote";
+    private static final String REQUIRED_CONFIGS = 
ExternalDataConstants.KEY_QUOTE;
 
     @Override
     public void configure(AsterixInputStream inputStream, Map<String, String> 
config) throws HyracksDataException {
@@ -47,6 +46,17 @@ public class QuotedLineRecordReader extends LineRecordReader 
{
                     ExternalDataConstants.PARAMETER_OF_SIZE_ONE, quoteString));
         }
         this.quote = quoteString.charAt(0);
+        String escapeString = 
config.get(ExternalDataConstants.KEY_QUOTE_ESCAPE);
+        if (escapeString == null) {
+            quoteEscape = ExternalDataConstants.ESCAPE;
+        } else {
+            if (escapeString.length() != 1) {
+                throw new HyracksDataException(
+                        
ExceptionUtils.incorrectParameterMessage(ExternalDataConstants.KEY_QUOTE_ESCAPE,
+                                ExternalDataConstants.PARAMETER_OF_SIZE_ONE, 
escapeString));
+            }
+            quoteEscape = escapeString.charAt(0);
+        }
     }
 
     @Override
@@ -67,31 +77,35 @@ public class QuotedLineRecordReader extends 
LineRecordReader {
             }
             newlineLength = 0;
             prevCharCR = false;
-            prevCharEscape = false;
+            boolean prevCharEscape = false;
             record.reset();
             int readLength = 0;
-            inQuote = false;
+            boolean inQuote = false;
             do {
                 int startPosn = bufferPosn;
                 if (bufferPosn >= bufferLength) {
                     startPosn = bufferPosn = 0;
                     bufferLength = reader.read(inputBuffer);
                     if (bufferLength <= 0) {
-                        {
-                            if (readLength > 0) {
-                                if (inQuote) {
-                                    throw new IOException("malformed input 
record ended inside quote");
-                                }
-                                record.endRecord();
-                                recordNumber++;
-                                return true;
+                        if (readLength > 0) {
+                            if (inQuote) {
+                                throw new IOException("malformed input record 
ended inside quote");
                             }
-                            close();
-                            return false;
+                            record.endRecord();
+                            recordNumber++;
+                            return true;
                         }
+                        close();
+                        return false;
                     }
                 }
+                boolean maybeInQuote = false;
                 for (; bufferPosn < bufferLength; ++bufferPosn) {
+                    if (inputBuffer[bufferPosn] == quote && quoteEscape == 
quote) {
+                        inQuote |= maybeInQuote;
+                        prevCharEscape |= maybeInQuote;
+                    }
+                    maybeInQuote = false;
                     if (!inQuote) {
                         if (inputBuffer[bufferPosn] == 
ExternalDataConstants.LF) {
                             newlineLength = (prevCharCR) ? 2 : 1;
@@ -103,24 +117,25 @@ public class QuotedLineRecordReader extends 
LineRecordReader {
                             break;
                         }
                         prevCharCR = (inputBuffer[bufferPosn] == 
ExternalDataConstants.CR);
-                        if (inputBuffer[bufferPosn] == quote) {
-                            if (!prevCharEscape) {
-                                inQuote = true;
-                            }
+                        if (inputBuffer[bufferPosn] == quote && 
!prevCharEscape) {
+                            // this is an opening quote
+                            inQuote = true;
                         }
                         if (prevCharEscape) {
                             prevCharEscape = false;
                         } else {
-                            prevCharEscape = inputBuffer[bufferPosn] == 
ExternalDataConstants.ESCAPE;
+                            // the quoteEscape != quote is for making an 
opening quote not an escape
+                            prevCharEscape = inputBuffer[bufferPosn] == 
quoteEscape && quoteEscape != quote;
                         }
                     } else {
-                        // only look for next quote
-                        if (inputBuffer[bufferPosn] == quote) {
-                            if (!prevCharEscape) {
-                                inQuote = false;
-                            }
+                        // if quote == quoteEscape and current char is quote, 
then it could be closing or escaping
+                        if (inputBuffer[bufferPosn] == quote && 
!prevCharEscape) {
+                            // this is most likely a closing quote. the 
outcome depends on the next char
+                            inQuote = false;
+                            maybeInQuote = true;
                         }
-                        prevCharEscape = inputBuffer[bufferPosn] == 
ExternalDataConstants.ESCAPE;
+                        prevCharEscape =
+                                inputBuffer[bufferPosn] == quoteEscape && 
!prevCharEscape && quoteEscape != quote;
                     }
                 }
                 readLength = bufferPosn - startPosn;
diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/DelimitedDataParser.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/DelimitedDataParser.java
index 4e371c8..8facce6 100644
--- 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/DelimitedDataParser.java
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/DelimitedDataParser.java
@@ -57,9 +57,9 @@ public class DelimitedDataParser extends AbstractDataParser 
implements IStreamDa
     private ArrayBackedValueStorage[] nameBuffers;
     private boolean areAllNullFields;
 
-    public DelimitedDataParser(IValueParserFactory[] valueParserFactories, 
char fieldDelimter, char quote,
+    public DelimitedDataParser(IValueParserFactory[] valueParserFactories, 
char fieldDelimiter, char quote,
             boolean hasHeader, ARecordType recordType, boolean isStreamParser) 
throws HyracksDataException {
-        this.fieldDelimiter = fieldDelimter;
+        this.fieldDelimiter = fieldDelimiter;
         this.quote = quote;
         this.hasHeader = hasHeader;
         this.recordType = recordType;
@@ -98,7 +98,7 @@ public class DelimitedDataParser extends AbstractDataParser 
implements IStreamDa
             }
         }
         if (!isStreamParser) {
-            cursor = new FieldCursorForDelimitedDataParser(null, 
fieldDelimiter, quote);
+            cursor = new FieldCursorForDelimitedDataParser(null, 
this.fieldDelimiter, quote);
         }
     }
 
@@ -134,25 +134,23 @@ public class DelimitedDataParser extends 
AbstractDataParser implements IStreamDa
             fieldValueBuffer.reset();
 
             try {
-                if (cursor.fStart == cursor.fEnd && 
recordType.getFieldTypes()[i].getTypeTag() != ATypeTag.STRING
+                if (cursor.isFieldEmpty() && 
recordType.getFieldTypes()[i].getTypeTag() != ATypeTag.STRING
                         && recordType.getFieldTypes()[i].getTypeTag() != 
ATypeTag.NULL) {
                     // if the field is empty and the type is optional, insert
                     // NULL. Note that string type can also process empty 
field as an
                     // empty string
                     if 
(!NonTaggedFormatUtil.isOptional(recordType.getFieldTypes()[i])) {
-                        throw new 
RuntimeDataException(ErrorCode.PARSER_DELIMITED_NONOPTIONAL_NULL, 
cursor.recordCount,
-                                cursor.fieldCount);
+                        throw new 
RuntimeDataException(ErrorCode.PARSER_DELIMITED_NONOPTIONAL_NULL,
+                                cursor.getRecordCount(), 
cursor.getFieldCount());
                     }
                     
fieldValueBufferOutput.writeByte(ATypeTag.SERIALIZED_NULL_TYPE_TAG);
                 } else {
                     fieldValueBufferOutput.writeByte(fieldTypeTags[i]);
-                    // Eliminate doule quotes in the field that we are going 
to parse
-                    if (cursor.isDoubleQuoteIncludedInThisField) {
-                        cursor.eliminateDoubleQuote(cursor.buffer, 
cursor.fStart, cursor.fEnd - cursor.fStart);
-                        cursor.fEnd -= cursor.doubleQuoteCount;
-                        cursor.isDoubleQuoteIncludedInThisField = false;
+                    // Eliminate double quotes in the field that we are going 
to parse
+                    if (cursor.fieldHasDoubleQuote()) {
+                        cursor.eliminateDoubleQuote();
                     }
-                    valueParsers[i].parse(cursor.buffer, cursor.fStart, 
cursor.fEnd - cursor.fStart,
+                    valueParsers[i].parse(cursor.getBuffer(), 
cursor.getFieldStart(), cursor.getFieldLength(),
                             fieldValueBufferOutput);
                     areAllNullFields = false;
                 }
@@ -165,15 +163,14 @@ public class DelimitedDataParser extends 
AbstractDataParser implements IStreamDa
                 throw HyracksDataException.create(e);
             }
         }
+        if (valueParsers.length != cursor.getFieldCount()) {
+            throw new HyracksDataException("Record #" + 
cursor.getRecordCount() + " is missing some fields");
+        }
     }
 
     @Override
     public void parse(IRawRecord<? extends char[]> record, DataOutput out) 
throws HyracksDataException {
-        try {
-            cursor.nextRecord(record.get(), record.size());
-        } catch (IOException e) {
-            throw HyracksDataException.create(e);
-        }
+        cursor.nextRecord(record.get(), record.size());
         parseRecord();
         if (!areAllNullFields) {
             recBuilder.write(out, true);
diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/DelimitedDataParserFactory.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/DelimitedDataParserFactory.java
index f406729..1fee49f 100644
--- 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/DelimitedDataParserFactory.java
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/DelimitedDataParserFactory.java
@@ -21,10 +21,7 @@ package org.apache.asterix.external.parser.factory;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.List;
-import java.util.Map;
 
-import org.apache.asterix.common.exceptions.ErrorCode;
-import org.apache.asterix.common.exceptions.RuntimeDataException;
 import 
org.apache.asterix.external.api.IExternalDataSourceFactory.DataSourceType;
 import org.apache.asterix.external.api.IRecordDataParser;
 import org.apache.asterix.external.api.IStreamDataParser;
@@ -40,7 +37,8 @@ public class DelimitedDataParserFactory extends 
AbstractRecordStreamParserFactor
 
     private static final long serialVersionUID = 1L;
     private static final List<String> parserFormats =
-            Collections.unmodifiableList(Arrays.asList("csv", 
"delimited-text"));
+            
Collections.unmodifiableList(Arrays.asList(ExternalDataConstants.FORMAT_CSV,
+                    ExternalDataConstants.FORMAT_DELIMITED_TEXT, 
ExternalDataConstants.FORMAT_TSV));
 
     @Override
     public IRecordDataParser<char[]> createRecordParser(IHyracksTaskContext 
ctx) throws HyracksDataException {
@@ -49,8 +47,8 @@ public class DelimitedDataParserFactory extends 
AbstractRecordStreamParserFactor
 
     private DelimitedDataParser createParser() throws HyracksDataException {
         IValueParserFactory[] valueParserFactories = 
ExternalDataUtils.getValueParserFactories(recordType);
-        Character delimiter = 
DelimitedDataParserFactory.getDelimiter(configuration);
-        char quote = DelimitedDataParserFactory.getQuote(configuration, 
delimiter);
+        char delimiter = ExternalDataUtils.getDelimiter(configuration);
+        char quote = ExternalDataUtils.getQuote(configuration, delimiter);
         boolean hasHeader = ExternalDataUtils.hasHeader(configuration);
         return new DelimitedDataParser(valueParserFactories, delimiter, quote, 
hasHeader, recordType,
                 
ExternalDataUtils.getDataSourceType(configuration).equals(DataSourceType.STREAM));
@@ -67,40 +65,6 @@ public class DelimitedDataParserFactory extends 
AbstractRecordStreamParserFactor
         return createParser();
     }
 
-    // Get a delimiter from the given configuration
-    public static char getDelimiter(Map<String, String> configuration) throws 
HyracksDataException {
-        String delimiterValue = 
configuration.get(ExternalDataConstants.KEY_DELIMITER);
-        if (delimiterValue == null) {
-            delimiterValue = ExternalDataConstants.DEFAULT_DELIMITER;
-        } else if (delimiterValue.length() != 1) {
-            throw new 
RuntimeDataException(ErrorCode.PARSER_FACTORY_DELIMITED_DATA_PARSER_FACTORY_NOT_VALID_DELIMITER,
-                    delimiterValue);
-        }
-        return delimiterValue.charAt(0);
-    }
-
-    // Get a quote from the given configuration when the delimiter is given
-    // Need to pass delimiter to check whether they share the same character
-    public static char getQuote(Map<String, String> configuration, char 
delimiter) throws HyracksDataException {
-        String quoteValue = configuration.get(ExternalDataConstants.KEY_QUOTE);
-        if (quoteValue == null) {
-            quoteValue = ExternalDataConstants.DEFAULT_QUOTE;
-        } else if (quoteValue.length() != 1) {
-            throw new 
RuntimeDataException(ErrorCode.PARSER_FACTORY_DELIMITED_DATA_PARSER_FACTORY_NOT_VALID_QUOTE,
-                    quoteValue);
-        }
-
-        // Since delimiter (char type value) can't be null,
-        // we only check whether delimiter and quote use the same character
-        if (quoteValue.charAt(0) == delimiter) {
-            throw new RuntimeDataException(
-                    
ErrorCode.PARSER_FACTORY_DELIMITED_DATA_PARSER_FACTORY_QUOTE_DELIMITER_MISMATCH,
 quoteValue,
-                    delimiter);
-        }
-
-        return quoteValue.charAt(0);
-    }
-
     @Override
     public void setMetaType(ARecordType metaType) {
     }
diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/provider/AdapterFactoryProvider.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/provider/AdapterFactoryProvider.java
index 414c460..27ac10e 100644
--- 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/provider/AdapterFactoryProvider.java
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/provider/AdapterFactoryProvider.java
@@ -27,7 +27,7 @@ import 
org.apache.asterix.external.adapter.factory.LookupAdapterFactory;
 import org.apache.asterix.external.api.IIndexingAdapterFactory;
 import org.apache.asterix.external.api.ITypedAdapterFactory;
 import org.apache.asterix.external.indexing.ExternalFile;
-import org.apache.asterix.external.util.ExternalDataCompatibilityUtils;
+import org.apache.asterix.external.util.ExternalDataUtils;
 import org.apache.asterix.om.types.ARecordType;
 import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
 import org.apache.hyracks.api.application.IServiceContext;
@@ -39,11 +39,15 @@ import 
org.apache.hyracks.api.exceptions.HyracksDataException;
  */
 public class AdapterFactoryProvider {
 
-    // Adapters
+    private AdapterFactoryProvider() {
+    }
+
+    // get adapter factory. this method has the side effect of modifying the 
configuration as necessary
     public static ITypedAdapterFactory getAdapterFactory(IServiceContext 
serviceCtx, String adapterName,
             Map<String, String> configuration, ARecordType itemType, 
ARecordType metaType)
             throws HyracksDataException, AlgebricksException {
-        ExternalDataCompatibilityUtils.prepare(adapterName, configuration);
+        ExternalDataUtils.defaultConfiguration(configuration);
+        ExternalDataUtils.prepare(adapterName, configuration);
         ICcApplicationContext context = (ICcApplicationContext) 
serviceCtx.getApplicationContext();
         ITypedAdapterFactory adapterFactory =
                 (ITypedAdapterFactory) 
context.getAdapterFactoryService().createAdapterFactory();
@@ -53,11 +57,12 @@ public class AdapterFactoryProvider {
         return adapterFactory;
     }
 
-    // Indexing Adapters
+    // get indexing adapter factory. this method has the side effect of 
modifying the configuration as necessary
     public static IIndexingAdapterFactory 
getIndexingAdapterFactory(IServiceContext serviceCtx, String adapterName,
             Map<String, String> configuration, ARecordType itemType, 
List<ExternalFile> snapshot, boolean indexingOp,
             ARecordType metaType) throws HyracksDataException, 
AlgebricksException {
-        ExternalDataCompatibilityUtils.prepare(adapterName, configuration);
+        ExternalDataUtils.defaultConfiguration(configuration);
+        ExternalDataUtils.prepare(adapterName, configuration);
         GenericAdapterFactory adapterFactory = new GenericAdapterFactory();
         adapterFactory.setOutputType(itemType);
         adapterFactory.setMetaType(metaType);
diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataCompatibilityUtils.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataCompatibilityUtils.java
index e222e99..8181262 100644
--- 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataCompatibilityUtils.java
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataCompatibilityUtils.java
@@ -18,8 +18,6 @@
  */
 package org.apache.asterix.external.util;
 
-import java.util.Map;
-
 import org.apache.asterix.common.exceptions.AsterixException;
 import org.apache.asterix.external.api.IDataParserFactory;
 import org.apache.asterix.external.api.IExternalDataSourceFactory;
@@ -30,6 +28,9 @@ import 
org.apache.asterix.external.input.record.converter.IRecordConverterFactor
 
 public class ExternalDataCompatibilityUtils {
 
+    private ExternalDataCompatibilityUtils() {
+    }
+
     public static void validateCompatibility(IExternalDataSourceFactory 
dataSourceFactory,
             IDataParserFactory dataParserFactory) throws AsterixException {
         if (dataSourceFactory.getDataSourceType() != 
dataParserFactory.getDataSourceType()) {
@@ -58,16 +59,4 @@ public class ExternalDataCompatibilityUtils {
                             + recordParserFactory.getRecordClass());
         }
     }
-
-    public static void prepare(String adapterName, Map<String, String> 
configuration) {
-        if (!configuration.containsKey(ExternalDataConstants.KEY_READER)) {
-            configuration.put(ExternalDataConstants.KEY_READER, adapterName);
-        }
-        if (!configuration.containsKey(ExternalDataConstants.KEY_PARSER)) {
-            if (configuration.containsKey(ExternalDataConstants.KEY_FORMAT)) {
-                configuration.put(ExternalDataConstants.KEY_PARSER,
-                        configuration.get(ExternalDataConstants.KEY_FORMAT));
-            }
-        }
-    }
 }
diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
index 26f5402..5427bc5 100644
--- 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
@@ -19,6 +19,10 @@
 package org.apache.asterix.external.util;
 
 public class ExternalDataConstants {
+
+    private ExternalDataConstants() {
+    }
+
     // TODO: Remove unused variables.
     /**
      * Keys
@@ -62,6 +66,7 @@ public class ExternalDataConstants {
     public static final String KEY_LOCAL_SOCKET_PATH = "local-socket-path";
     public static final String KEY_FORMAT = "format";
     public static final String KEY_QUOTE = "quote";
+    public static final String KEY_QUOTE_ESCAPE = "quote-escape";
     public static final String KEY_PARSER = "parser";
     public static final String KEY_DATASET_RECORD = "dataset-record";
     public static final String KEY_HIVE_SERDE = "hive-serde";
@@ -188,6 +193,8 @@ public class ExternalDataConstants {
      */
     public static final String TRUE = "true";
     public static final String FALSE = "false";
+    public static final String TAB_STR = "\t";
+    public static final String NULL_STR = "\0";
 
     /**
      * Constant characters
@@ -228,6 +235,7 @@ public class ExternalDataConstants {
     public static final String KEY_READER_FACTORY = "reader-factory";
     public static final String READER_RSS = "rss_feed";
     public static final String FORMAT_CSV = "csv";
+    public static final String FORMAT_TSV = "tsv";
 
     public static final String ERROR_PARSE_RECORD = "Parser failed to parse 
record";
 
diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java
index a418cbf..443aa7e 100644
--- 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java
@@ -34,6 +34,7 @@ import org.apache.asterix.om.types.ATypeTag;
 import org.apache.asterix.om.types.AUnionType;
 import org.apache.hyracks.algebricks.common.exceptions.NotImplementedException;
 import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.dataflow.common.data.parsers.BooleanParserFactory;
 import org.apache.hyracks.dataflow.common.data.parsers.DoubleParserFactory;
 import org.apache.hyracks.dataflow.common.data.parsers.FloatParserFactory;
 import org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory;
@@ -43,48 +44,43 @@ import 
org.apache.hyracks.dataflow.common.data.parsers.UTF8StringParserFactory;
 
 public class ExternalDataUtils {
 
+    private ExternalDataUtils() {
+    }
+
     // Get a delimiter from the given configuration
-    public static char getDelimiter(Map<String, String> configuration) throws 
AsterixException {
+    public static char getDelimiter(Map<String, String> configuration) throws 
HyracksDataException {
         String delimiterValue = 
configuration.get(ExternalDataConstants.KEY_DELIMITER);
         if (delimiterValue == null) {
             delimiterValue = ExternalDataConstants.DEFAULT_DELIMITER;
         } else if (delimiterValue.length() != 1) {
-            throw new AsterixException(
-                    "'" + delimiterValue + "' is not a valid delimiter. The 
length of a delimiter should be 1.");
+            throw new 
RuntimeDataException(ErrorCode.PARSER_FACTORY_DELIMITED_DATA_PARSER_FACTORY_NOT_VALID_DELIMITER,
+                    delimiterValue);
         }
         return delimiterValue.charAt(0);
     }
 
     // Get a quote from the given configuration when the delimiter is given
     // Need to pass delimiter to check whether they share the same character
-    public static char getQuote(Map<String, String> configuration, char 
delimiter) throws AsterixException {
+    public static char getQuote(Map<String, String> configuration, char 
delimiter) throws HyracksDataException {
         String quoteValue = configuration.get(ExternalDataConstants.KEY_QUOTE);
         if (quoteValue == null) {
             quoteValue = ExternalDataConstants.DEFAULT_QUOTE;
         } else if (quoteValue.length() != 1) {
-            throw new AsterixException("'" + quoteValue + "' is not a valid 
quote. The length of a quote should be 1.");
+            throw new 
RuntimeDataException(ErrorCode.PARSER_FACTORY_DELIMITED_DATA_PARSER_FACTORY_NOT_VALID_QUOTE,
+                    quoteValue);
         }
 
         // Since delimiter (char type value) can't be null,
         // we only check whether delimiter and quote use the same character
         if (quoteValue.charAt(0) == delimiter) {
-            throw new AsterixException(
-                    "Quote '" + quoteValue + "' cannot be used with the 
delimiter '" + delimiter + "'. ");
+            throw new RuntimeDataException(
+                    
ErrorCode.PARSER_FACTORY_DELIMITED_DATA_PARSER_FACTORY_QUOTE_DELIMITER_MISMATCH,
 quoteValue,
+                    delimiter);
         }
 
         return quoteValue.charAt(0);
     }
 
-    // Get the header flag
-    public static boolean getHasHeader(Map<String, String> configuration) {
-        return 
Boolean.parseBoolean(configuration.get(ExternalDataConstants.KEY_HEADER));
-    }
-
-    public static void validateParameters(Map<String, String> configuration) 
throws AsterixException {
-        validateDataSourceParameters(configuration);
-        validateDataParserParameters(configuration);
-    }
-
     public static void validateDataParserParameters(Map<String, String> 
configuration) throws AsterixException {
         String parser = configuration.get(ExternalDataConstants.KEY_FORMAT);
         if (parser == null) {
@@ -150,15 +146,6 @@ public class ExternalDataUtils {
         return parserFormat != null ? parserFormat : 
configuration.get(ExternalDataConstants.KEY_FORMAT);
     }
 
-    public static void setRecordFormat(Map<String, String> configuration, 
String format) {
-        if (!configuration.containsKey(ExternalDataConstants.KEY_DATA_PARSER)) 
{
-            configuration.put(ExternalDataConstants.KEY_DATA_PARSER, format);
-        }
-        if (!configuration.containsKey(ExternalDataConstants.KEY_FORMAT)) {
-            configuration.put(ExternalDataConstants.KEY_FORMAT, format);
-        }
-    }
-
     private static Map<ATypeTag, IValueParserFactory> valueParserFactoryMap = 
initializeValueParserFactoryMap();
 
     private static Map<ATypeTag, IValueParserFactory> 
initializeValueParserFactoryMap() {
@@ -168,6 +155,7 @@ public class ExternalDataUtils {
         m.put(ATypeTag.DOUBLE, DoubleParserFactory.INSTANCE);
         m.put(ATypeTag.BIGINT, LongParserFactory.INSTANCE);
         m.put(ATypeTag.STRING, UTF8StringParserFactory.INSTANCE);
+        m.put(ATypeTag.BOOLEAN, BooleanParserFactory.INSTANCE);
         return m;
     }
 
@@ -201,10 +189,6 @@ public class ExternalDataUtils {
         return vpf;
     }
 
-    public static String getRecordReaderStreamName(Map<String, String> 
configuration) {
-        return configuration.get(ExternalDataConstants.KEY_READER_STREAM);
-    }
-
     public static boolean hasHeader(Map<String, String> configuration) {
         String value = configuration.get(ExternalDataConstants.KEY_HEADER);
         if (value != null) {
@@ -281,12 +265,6 @@ public class ExternalDataUtils {
         return configuration.get(ExternalDataConstants.KEY_FEED_NAME);
     }
 
-    public static int getQueueSize(Map<String, String> configuration) {
-        return configuration.containsKey(ExternalDataConstants.KEY_QUEUE_SIZE)
-                ? 
Integer.parseInt(configuration.get(ExternalDataConstants.KEY_QUEUE_SIZE))
-                : ExternalDataConstants.DEFAULT_QUEUE_SIZE;
-    }
-
     public static boolean isRecordWithMeta(Map<String, String> configuration) {
         return 
configuration.containsKey(ExternalDataConstants.KEY_META_TYPE_NAME);
     }
@@ -339,4 +317,42 @@ public class ExternalDataUtils {
         }
         return intIndicators;
     }
+
+    /**
+     * Fills the configuration of the external dataset and its adapter with 
default values if not provided by user.
+     *
+     * @param configuration external data configuration
+     */
+    public static void defaultConfiguration(Map<String, String> configuration) 
{
+        String format = configuration.get(ExternalDataConstants.KEY_FORMAT);
+        if (format != null) {
+            // default quote, escape character for quote and fields delimiter 
for csv and tsv format
+            if (format.equals(ExternalDataConstants.FORMAT_CSV)) {
+                configuration.putIfAbsent(ExternalDataConstants.KEY_DELIMITER, 
ExternalDataConstants.DEFAULT_DELIMITER);
+                configuration.putIfAbsent(ExternalDataConstants.KEY_QUOTE, 
ExternalDataConstants.DEFAULT_QUOTE);
+                
configuration.putIfAbsent(ExternalDataConstants.KEY_QUOTE_ESCAPE, 
ExternalDataConstants.DEFAULT_QUOTE);
+            } else if (format.equals(ExternalDataConstants.FORMAT_TSV)) {
+                configuration.putIfAbsent(ExternalDataConstants.KEY_DELIMITER, 
ExternalDataConstants.TAB_STR);
+                configuration.putIfAbsent(ExternalDataConstants.KEY_QUOTE, 
ExternalDataConstants.NULL_STR);
+                
configuration.putIfAbsent(ExternalDataConstants.KEY_QUOTE_ESCAPE, 
ExternalDataConstants.NULL_STR);
+            }
+        }
+    }
+
+    /**
+     * Prepares the configuration of the external dataset and its adapter by 
filling the information required by
+     * adapters and parsers.
+     *
+     * @param adapterName adapter name
+     * @param configuration external data configuration
+     */
+    public static void prepare(String adapterName, Map<String, String> 
configuration) {
+        if (!configuration.containsKey(ExternalDataConstants.KEY_READER)) {
+            configuration.put(ExternalDataConstants.KEY_READER, adapterName);
+        }
+        if (!configuration.containsKey(ExternalDataConstants.KEY_PARSER)
+                && 
configuration.containsKey(ExternalDataConstants.KEY_FORMAT)) {
+            configuration.put(ExternalDataConstants.KEY_PARSER, 
configuration.get(ExternalDataConstants.KEY_FORMAT));
+        }
+    }
 }
diff --git 
a/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/BooleanParserFactory.java
 
b/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/BooleanParserFactory.java
new file mode 100644
index 0000000..488be04
--- /dev/null
+++ 
b/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/BooleanParserFactory.java
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.hyracks.dataflow.common.data.parsers;
+
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+
+public class BooleanParserFactory implements IValueParserFactory {
+
+    private static final long serialVersionUID = 1L;
+
+    public static final IValueParserFactory INSTANCE = new 
BooleanParserFactory();
+
+    private BooleanParserFactory() {
+    }
+
+    @Override
+    public IValueParser createValueParser() {
+        return BooleanParserFactory::parse;
+    }
+
+    public static void parse(char[] buffer, int start, int length, DataOutput 
out) throws HyracksDataException {
+        try {
+            if (length == 4 && (buffer[start] == 't' || buffer[start] == 'T')
+                    && (buffer[start + 1] == 'r' || buffer[start + 1] == 'R')
+                    && (buffer[start + 2] == 'u' || buffer[start + 2] == 'U')
+                    && (buffer[start + 3] == 'e' || buffer[start + 3] == 'E')) 
{
+                out.writeBoolean(true);
+                return;
+            } else if (length == 5 && (buffer[start] == 'f' || buffer[start] 
== 'F')
+                    && (buffer[start + 1] == 'a' || buffer[start + 1] == 'A')
+                    && (buffer[start + 2] == 'l' || buffer[start + 2] == 'L')
+                    && (buffer[start + 3] == 's' || buffer[start + 3] == 'S')
+                    && (buffer[start + 4] == 'e' || buffer[start + 4] == 'E')) 
{
+                out.writeBoolean(false);
+                return;
+            }
+        } catch (IOException e) {
+            throw HyracksDataException.create(e);
+        }
+
+        throw new HyracksDataException("Invalid input data");
+    }
+}
diff --git 
a/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/file/DelimitedDataTupleParserFactory.java
 
b/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/file/DelimitedDataTupleParserFactory.java
index 9ddb4c2..2eb882a 100644
--- 
a/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/file/DelimitedDataTupleParserFactory.java
+++ 
b/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/file/DelimitedDataTupleParserFactory.java
@@ -76,12 +76,11 @@ public class DelimitedDataTupleParserFactory implements 
ITupleParserFactory {
                                 break;
                             }
                             // Eliminate double quotes in the field that we 
are going to parse
-                            if (cursor.isDoubleQuoteIncludedInThisField) {
-                                cursor.eliminateDoubleQuote(cursor.buffer, 
cursor.fStart, cursor.fEnd - cursor.fStart);
-                                cursor.fEnd -= cursor.doubleQuoteCount;
-                                cursor.isDoubleQuoteIncludedInThisField = 
false;
+                            if (cursor.fieldHasDoubleQuote()) {
+                                cursor.eliminateDoubleQuote();
                             }
-                            valueParsers[i].parse(cursor.buffer, 
cursor.fStart, cursor.fEnd - cursor.fStart, dos);
+                            valueParsers[i].parse(cursor.getBuffer(), 
cursor.getFieldStart(), cursor.getFieldLength(),
+                                    dos);
                             tb.addFieldEndOffset();
                         }
                         FrameUtils.appendToWriter(writer, appender, 
tb.getFieldEndOffsets(), tb.getByteArray(), 0,
diff --git 
a/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/file/FieldCursorForDelimitedDataParser.java
 
b/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/file/FieldCursorForDelimitedDataParser.java
index 7e5ee2c..fd3e4c3 100644
--- 
a/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/file/FieldCursorForDelimitedDataParser.java
+++ 
b/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/file/FieldCursorForDelimitedDataParser.java
@@ -32,18 +32,18 @@ public class FieldCursorForDelimitedDataParser {
         EOF //end of stream reached
     }
 
-    public char[] buffer; //buffer to holds the input coming form the 
underlying input stream
-    public int fStart; //start position for field
-    public int fEnd; //end position for field
-    public int recordCount; //count of records
-    public int fieldCount; //count of fields in current record
-    public int doubleQuoteCount; //count of double quotes
-    public boolean isDoubleQuoteIncludedInThisField; //does current field 
include double quotes
+    private char[] buffer; //buffer to holds the input coming form the 
underlying input stream
+    private int fStart; //start position for field
+    private int fEnd; //end position for field
+    private int recordCount; //count of records
+    private int fieldCount; //count of fields in current record
+    private int doubleQuoteCount; //count of double quotes
+    private boolean isDoubleQuoteIncludedInThisField; //does current field 
include double quotes
 
     private static final int INITIAL_BUFFER_SIZE = 4096;//initial buffer size
     private static final int INCREMENT = 4096; //increment size
 
-    private Reader in; //the underlying buffer
+    private final Reader in; //the underlying buffer
 
     private int start; //start of valid buffer area
     private int end; //end of valid buffer area
@@ -55,8 +55,8 @@ public class FieldCursorForDelimitedDataParser {
     private int quoteCount; //count of single quotes
     private boolean startedQuote; //whether a quote has been started
 
-    private char quote; //the quote character
-    private char fieldDelimiter; //the delimiter
+    private final char quote; //the quote character
+    private final char fieldDelimiter; //the delimiter
 
     public FieldCursorForDelimitedDataParser(Reader in, char fieldDelimiter, 
char quote) {
         this.in = in;
@@ -70,9 +70,9 @@ public class FieldCursorForDelimitedDataParser {
         state = State.INIT;
         this.quote = quote;
         this.fieldDelimiter = fieldDelimiter;
-        lastDelimiterPosition = -99;
-        lastQuotePosition = -99;
-        lastDoubleQuotePosition = -99;
+        lastDelimiterPosition = -1;
+        lastQuotePosition = -1;
+        lastDoubleQuotePosition = -1;
         quoteCount = 0;
         doubleQuoteCount = 0;
         startedQuote = false;
@@ -81,9 +81,44 @@ public class FieldCursorForDelimitedDataParser {
         fieldCount = 0;
     }
 
-    public void nextRecord(char[] buffer, int recordLength) throws IOException 
{
+    public char[] getBuffer() {
+        return buffer;
+    }
+
+    public int getFieldStart() {
+        return fStart;
+    }
+
+    public int getFieldLength() {
+        return fEnd - fStart;
+    }
+
+    public boolean isFieldEmpty() {
+        return fStart == fEnd;
+    }
+
+    public boolean fieldHasDoubleQuote() {
+        return isDoubleQuoteIncludedInThisField;
+    }
+
+    public int getFieldCount() {
+        return fieldCount;
+    }
+
+    public int getRecordCount() {
+        return recordCount;
+    }
+
+    public void nextRecord(char[] buffer, int recordLength) {
         recordCount++;
         fieldCount = 0;
+        lastDelimiterPosition = -1;
+        lastQuotePosition = -1;
+        lastDoubleQuotePosition = -1;
+        quoteCount = 0;
+        doubleQuoteCount = 0;
+        startedQuote = false;
+        isDoubleQuoteIncludedInThisField = false;
         start = 0;
         end = recordLength;
         state = State.IN_RECORD;
@@ -187,7 +222,6 @@ public class FieldCursorForDelimitedDataParser {
     }
 
     public boolean nextField() throws IOException {
-        fieldCount++;
         switch (state) {
             case INIT:
             case EOR:
@@ -196,12 +230,12 @@ public class FieldCursorForDelimitedDataParser {
                 return false;
 
             case IN_RECORD:
-                boolean eof;
+                fieldCount++;
                 // reset quote related values
                 startedQuote = false;
                 isDoubleQuoteIncludedInThisField = false;
-                lastQuotePosition = -99;
-                lastDoubleQuotePosition = -99;
+                lastQuotePosition = -1;
+                lastDoubleQuotePosition = -1;
                 quoteCount = 0;
                 doubleQuoteCount = 0;
 
@@ -209,21 +243,26 @@ public class FieldCursorForDelimitedDataParser {
                 while (true) {
                     if (p >= end) {
                         int s = start;
-                        eof = !readMore();
+                        boolean eof = !readMore();
                         p -= (s - start);
-                        lastQuotePosition -= (s - start);
-                        lastDoubleQuotePosition -= (s - start);
-                        lastDelimiterPosition -= (s - start);
+                        lastQuotePosition -= (lastQuotePosition > -1) ? (s - 
start) : 0;
+                        lastDoubleQuotePosition -= (lastDoubleQuotePosition > 
-1) ? (s - start) : 0;
+                        lastDelimiterPosition -= (lastDelimiterPosition > -1) 
? (s - start) : 0;
                         if (eof) {
                             state = State.EOF;
-                            if (startedQuote && lastQuotePosition == p - 1 && 
lastDoubleQuotePosition != p - 1
-                                    && quoteCount == doubleQuoteCount * 2 + 2) 
{
-                                // set the position of fStart to +1, fEnd to 
-1 to remove quote character
-                                fStart = start + 1;
-                                fEnd = p - 1;
-                            } else {
+                            if (!startedQuote) {
                                 fStart = start;
                                 fEnd = p;
+                            } else {
+                                if (lastQuotePosition == p - 1 && 
lastDoubleQuotePosition != p - 1
+                                        && quoteCount == doubleQuoteCount * 2 
+ 2) {
+                                    // set the position of fStart to +1, fEnd 
to -1 to remove quote character
+                                    fStart = start + 1;
+                                    fEnd = p - 1;
+                                } else {
+                                    throw new IOException("At record: " + 
recordCount + ", field#: " + fieldCount
+                                            + " - missing a closing quote");
+                                }
                             }
                             return true;
                         }
@@ -232,12 +271,12 @@ public class FieldCursorForDelimitedDataParser {
                     if (ch == quote) {
                         // If this is first quote in the field, then it needs 
to be placed in the beginning.
                         if (!startedQuote) {
-                            if (lastDelimiterPosition == p - 1 || 
lastDelimiterPosition == -99) {
+                            if (p == start) {
                                 startedQuote = true;
                             } else {
                                 // In this case, we don't have a quote in the 
beginning of a field.
                                 throw new IOException("At record: " + 
recordCount + ", field#: " + fieldCount
-                                        + " - a quote enclosing a field needs 
to be placed in the beginning of that field.");
+                                        + " - a quote enclosing a field needs 
to be placed in the beginning of that field");
                             }
                         }
                         // Check double quotes - "". We check [start != p-2]
@@ -245,8 +284,8 @@ public class FieldCursorForDelimitedDataParser {
                         // since it looks like a double quote. However, it's 
not a double quote.
                         // (e.g. if field2 has no value:
                         //       field1,"",field3 ... )
-                        if (lastQuotePosition == p - 1 && 
lastDelimiterPosition != p - 2
-                                && lastDoubleQuotePosition != p - 1) {
+                        if (lastQuotePosition == p - 1 && 
lastDoubleQuotePosition != p - 1
+                                && lastQuotePosition != start) {
                             isDoubleQuoteIncludedInThisField = true;
                             doubleQuoteCount++;
                             lastDoubleQuotePosition = p;
@@ -262,64 +301,46 @@ public class FieldCursorForDelimitedDataParser {
                             start = p + 1;
                             lastDelimiterPosition = p;
                             return true;
-                        } else if (startedQuote) {
-                            if (lastQuotePosition == p - 1 && 
lastDoubleQuotePosition != p - 1) {
-                                // There is a quote right before the delimiter 
(e.g. ",)  and it is not two quote,
-                                // then the field contains a valid string.
-                                // We set the position of fStart to +1, fEnd 
to -1 to remove quote character
-                                fStart = start + 1;
-                                fEnd = p - 1;
-                                start = p + 1;
-                                lastDelimiterPosition = p;
-                                startedQuote = false;
-                                return true;
-                            } else if (lastQuotePosition < p - 1 && 
lastQuotePosition != lastDoubleQuotePosition
-                                    && quoteCount == doubleQuoteCount * 2 + 2) 
{
-                                // There is a quote before the delimiter, 
however it is not directly placed before the delimiter.
-                                // In this case, we throw an exception.
-                                // quoteCount == doubleQuoteCount * 2 + 2 : 
only true when we have two quotes except double-quotes.
-                                throw new IOException("At record: " + 
recordCount + ", field#: " + fieldCount
-                                        + " -  A quote enclosing a field needs 
to be followed by the delimiter.");
-                            }
                         }
-                        // If the control flow reaches here: we have a 
delimiter in this field and
-                        // there should be a quote in the beginning and the 
end of
-                        // this field. So, just continue reading next character
-                    } else if (ch == '\n') {
-                        if (!startedQuote) {
-                            fStart = start;
-                            fEnd = p;
-                            start = p + 1;
-                            state = State.EOR;
-                            lastDelimiterPosition = p;
-                            return true;
-                        } else if (startedQuote && lastQuotePosition == p - 1 
&& lastDoubleQuotePosition != p - 1
-                                && quoteCount == doubleQuoteCount * 2 + 2) {
-                            // set the position of fStart to +1, fEnd to -1 to 
remove quote character
+
+                        if (lastQuotePosition == p - 1 && 
lastDoubleQuotePosition != p - 1
+                                && lastQuotePosition != start) {
+                            // There is a quote right before the delimiter 
(e.g. ",)  and it is not two quote,
+                            // then the field contains a valid string.
+                            // We set the position of fStart to +1, fEnd to -1 
to remove quote character
                             fStart = start + 1;
                             fEnd = p - 1;
-                            lastDelimiterPosition = p;
                             start = p + 1;
-                            state = State.EOR;
+                            lastDelimiterPosition = p;
                             startedQuote = false;
                             return true;
+                        } else if (lastQuotePosition < p - 1 && 
lastQuotePosition != lastDoubleQuotePosition
+                                && quoteCount == doubleQuoteCount * 2 + 2) {
+                            // There is a quote before the delimiter, however 
it is not directly placed before the delimiter.
+                            // In this case, we throw an exception.
+                            // quoteCount == doubleQuoteCount * 2 + 2 : only 
true when we have two quotes except double-quotes.
+                            throw new IOException("At record: " + recordCount 
+ ", field#: " + fieldCount
+                                    + " -  A quote enclosing a field needs to 
be followed by the delimiter.");
                         }
-                    } else if (ch == '\r') {
+                        // If the control flow reaches here: we have a 
delimiter in this field and
+                        // there should be a quote in the beginning and the 
end of
+                        // this field. So, just continue reading next character
+                    } else if (ch == '\n' || ch == '\r') {
                         if (!startedQuote) {
                             fStart = start;
                             fEnd = p;
                             start = p + 1;
-                            state = State.CR;
+                            state = ch == '\n' ? State.EOR : State.CR;
                             lastDelimiterPosition = p;
                             return true;
-                        } else if (startedQuote && lastQuotePosition == p - 1 
&& lastDoubleQuotePosition != p - 1
+                        } else if (lastQuotePosition == p - 1 && 
lastDoubleQuotePosition != p - 1
                                 && quoteCount == doubleQuoteCount * 2 + 2) {
                             // set the position of fStart to +1, fEnd to -1 to 
remove quote character
                             fStart = start + 1;
                             fEnd = p - 1;
                             lastDelimiterPosition = p;
                             start = p + 1;
-                            state = State.CR;
+                            state = ch == '\n' ? State.EOR : State.CR;
                             startedQuote = false;
                             return true;
                         }
@@ -330,7 +351,10 @@ public class FieldCursorForDelimitedDataParser {
         throw new IllegalStateException();
     }
 
-    protected boolean readMore() throws IOException {
+    private boolean readMore() throws IOException {
+        if (in == null) {
+            return false;
+        }
         if (start > 0) {
             System.arraycopy(buffer, start, buffer, 0, end - start);
         }
@@ -350,10 +374,11 @@ public class FieldCursorForDelimitedDataParser {
     }
 
     // Eliminate escaped double quotes("") in a field
-    public void eliminateDoubleQuote(char[] buffer, int start, int length) {
-        int lastDoubleQuotePosition = -99;
-        int writepos = start;
-        int readpos = start;
+    public void eliminateDoubleQuote() {
+        int lastDoubleQuotePosition = -1;
+        int writepos = fStart;
+        int readpos = fStart;
+        int length = fEnd - fStart;
         // Find positions where double quotes appear
         for (int i = 0; i < length; i++) {
             // Skip double quotes
@@ -369,5 +394,7 @@ public class FieldCursorForDelimitedDataParser {
                 readpos++;
             }
         }
+        fEnd -= doubleQuoteCount;
+        isDoubleQuoteIncludedInThisField = false;
     }
 }
diff --git 
a/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/test/java/org/apache/hyracks/dataflow/std/file/CursorTest.java
 
b/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/test/java/org/apache/hyracks/dataflow/std/file/CursorTest.java
index e663179..8edcafc 100644
--- 
a/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/test/java/org/apache/hyracks/dataflow/std/file/CursorTest.java
+++ 
b/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/test/java/org/apache/hyracks/dataflow/std/file/CursorTest.java
@@ -51,9 +51,8 @@ public class CursorTest {
             while (cursor.nextRecord()) {
                 int fieldNumber = 0;
                 while (cursor.nextField()) {
-                    if (cursor.isDoubleQuoteIncludedInThisField) {
-                        cursor.eliminateDoubleQuote(cursor.buffer, 
cursor.fStart, cursor.fEnd - cursor.fStart);
-                        cursor.fEnd -= cursor.doubleQuoteCount;
+                    if (cursor.fieldHasDoubleQuote()) {
+                        cursor.eliminateDoubleQuote();
                     }
                     fieldNumber++;
                 }

[asterixdb] 05/20: [ASTERIXDB-2713][EXT] Add CSV & TSV support for external dataset

Reply via email to