Ian Maxon has uploaded a new change for review.

  https://asterix-gerrit.ics.uci.edu/2590

Change subject: [ASTERIXDB-2363][EXT] Fix quoted delimiter handling
......................................................................

[ASTERIXDB-2363][EXT] Fix quoted delimiter handling

Change-Id: I1fd4435702041a2c857cdf2c3fe95f999aa6720a
---
A asterixdb/asterix-app/data/csv/csv_escaped_delimiter.csv
A 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_escaped_delimiter/csv_escaped_delimiter.1.ddl.sqlpp
A 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_escaped_delimiter/csv_escaped_delimiter.2.update.sqlpp
A 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_escaped_delimiter/csv_escaped_delimiter.3.query.sqlpp
A 
asterixdb/asterix-app/src/test/resources/runtimets/results/load/csv_escaped_delimiter/csv_escaped_delimiter.1.adm
M asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml
M 
hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/file/FieldCursorForDelimitedDataParser.java
7 files changed, 110 insertions(+), 2 deletions(-)


  git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb 
refs/changes/90/2590/1

diff --git a/asterixdb/asterix-app/data/csv/csv_escaped_delimiter.csv 
b/asterixdb/asterix-app/data/csv/csv_escaped_delimiter.csv
new file mode 100644
index 0000000..3341f6f
--- /dev/null
+++ b/asterixdb/asterix-app/data/csv/csv_escaped_delimiter.csv
@@ -0,0 +1 @@
+1,",",",      1a","",""
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_escaped_delimiter/csv_escaped_delimiter.1.ddl.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_escaped_delimiter/csv_escaped_delimiter.1.ddl.sqlpp
new file mode 100644
index 0000000..2fc9b63
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_escaped_delimiter/csv_escaped_delimiter.1.ddl.sqlpp
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/**
+ *
+ * CSV file loading test
+ * Expected result: success
+ *
+ */
+
+drop  dataverse temp if exists;
+create  dataverse temp;
+
+use temp;
+
+
+create type temp.test as
+ closed {
+  id : bigint,
+  string : string,
+  string2: string,
+  string3: string
+};
+
+create  dataset testds(test) primary key id;
+
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_escaped_delimiter/csv_escaped_delimiter.2.update.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_escaped_delimiter/csv_escaped_delimiter.2.update.sqlpp
new file mode 100644
index 0000000..2a2093b
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_escaped_delimiter/csv_escaped_delimiter.2.update.sqlpp
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/**
+ *
+ * CSV file loading test
+ * Expected result: success
+ *
+ */
+
+use temp;
+
+
+load  dataset testds using localfs (
+(`path`=`asterix_nc1://data/csv/csv_escaped_delimiter.csv`),
+(`format`=`delimited-text`),(`delimiter`=`,`));
+
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_escaped_delimiter/csv_escaped_delimiter.3.query.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_escaped_delimiter/csv_escaped_delimiter.3.query.sqlpp
new file mode 100644
index 0000000..938bff5
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_escaped_delimiter/csv_escaped_delimiter.3.query.sqlpp
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/**
+ *
+ * CSV file loading test
+ * Expected result: success
+ *
+ */
+
+use temp;
+
+select * from testds
+order by id;
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/load/csv_escaped_delimiter/csv_escaped_delimiter.1.adm
 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/load/csv_escaped_delimiter/csv_escaped_delimiter.1.adm
new file mode 100644
index 0000000..eac588c
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/load/csv_escaped_delimiter/csv_escaped_delimiter.1.adm
@@ -0,0 +1 @@
+{ "testds": { "id": 1, "string": ",", "string2": ",      1a", "string3": "" } }
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml 
b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml
index 1b49358..b4c22f2 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml
@@ -8361,6 +8361,11 @@
       </compilation-unit>
     </test-case>
     <test-case FilePath="load">
+      <compilation-unit name="csv_escaped_delimiter">
+        <output-dir compare="Text">csv_escaped_delimiter</output-dir>
+      </compilation-unit>
+    </test-case>
+    <test-case FilePath="load">
       <compilation-unit name="issue14_query">
         <output-dir compare="Text">issue14_query</output-dir>
         <expected-error>Unspecified parameter: format</expected-error>
diff --git 
a/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/file/FieldCursorForDelimitedDataParser.java
 
b/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/file/FieldCursorForDelimitedDataParser.java
index 7e5ee2c..d48fd3b 100644
--- 
a/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/file/FieldCursorForDelimitedDataParser.java
+++ 
b/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/file/FieldCursorForDelimitedDataParser.java
@@ -263,7 +263,7 @@
                             lastDelimiterPosition = p;
                             return true;
                         } else if (startedQuote) {
-                            if (lastQuotePosition == p - 1 && 
lastDoubleQuotePosition != p - 1) {
+                            if (lastQuotePosition == p - 1 && 
lastDoubleQuotePosition != p - 1 && quoteCount % 2 == 0) {
                                 // There is a quote right before the delimiter 
(e.g. ",)  and it is not two quote,
                                 // then the field contains a valid string.
                                 // We set the position of fStart to +1, fEnd 
to -1 to remove quote character
@@ -271,7 +271,6 @@
                                 fEnd = p - 1;
                                 start = p + 1;
                                 lastDelimiterPosition = p;
-                                startedQuote = false;
                                 return true;
                             } else if (lastQuotePosition < p - 1 && 
lastQuotePosition != lastDoubleQuotePosition
                                     && quoteCount == doubleQuoteCount * 2 + 2) 
{

-- 
To view, visit https://asterix-gerrit.ics.uci.edu/2590
To unsubscribe, visit https://asterix-gerrit.ics.uci.edu/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I1fd4435702041a2c857cdf2c3fe95f999aa6720a
Gerrit-PatchSet: 1
Gerrit-Project: asterixdb
Gerrit-Branch: release-0.9.4-pre-rc
Gerrit-Owner: Ian Maxon <ima...@apache.org>

Reply via email to