Ian Maxon has uploaded a new change for review. https://asterix-gerrit.ics.uci.edu/2590
Change subject: [ASTERIXDB-2363][EXT] Fix quoted delimiter handling ...................................................................... [ASTERIXDB-2363][EXT] Fix quoted delimiter handling Change-Id: I1fd4435702041a2c857cdf2c3fe95f999aa6720a --- A asterixdb/asterix-app/data/csv/csv_escaped_delimiter.csv A asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_escaped_delimiter/csv_escaped_delimiter.1.ddl.sqlpp A asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_escaped_delimiter/csv_escaped_delimiter.2.update.sqlpp A asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_escaped_delimiter/csv_escaped_delimiter.3.query.sqlpp A asterixdb/asterix-app/src/test/resources/runtimets/results/load/csv_escaped_delimiter/csv_escaped_delimiter.1.adm M asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml M hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/file/FieldCursorForDelimitedDataParser.java 7 files changed, 110 insertions(+), 2 deletions(-) git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb refs/changes/90/2590/1 diff --git a/asterixdb/asterix-app/data/csv/csv_escaped_delimiter.csv b/asterixdb/asterix-app/data/csv/csv_escaped_delimiter.csv new file mode 100644 index 0000000..3341f6f --- /dev/null +++ b/asterixdb/asterix-app/data/csv/csv_escaped_delimiter.csv @@ -0,0 +1 @@ +1,",",", 1a","","" diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_escaped_delimiter/csv_escaped_delimiter.1.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_escaped_delimiter/csv_escaped_delimiter.1.ddl.sqlpp new file mode 100644 index 0000000..2fc9b63 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_escaped_delimiter/csv_escaped_delimiter.1.ddl.sqlpp @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/** + * + * CSV file loading test + * Expected result: success + * + */ + +drop dataverse temp if exists; +create dataverse temp; + +use temp; + + +create type temp.test as + closed { + id : bigint, + string : string, + string2: string, + string3: string +}; + +create dataset testds(test) primary key id; + diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_escaped_delimiter/csv_escaped_delimiter.2.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_escaped_delimiter/csv_escaped_delimiter.2.update.sqlpp new file mode 100644 index 0000000..2a2093b --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_escaped_delimiter/csv_escaped_delimiter.2.update.sqlpp @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/** + * + * CSV file loading test + * Expected result: success + * + */ + +use temp; + + +load dataset testds using localfs ( +(`path`=`asterix_nc1://data/csv/csv_escaped_delimiter.csv`), +(`format`=`delimited-text`),(`delimiter`=`,`)); + diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_escaped_delimiter/csv_escaped_delimiter.3.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_escaped_delimiter/csv_escaped_delimiter.3.query.sqlpp new file mode 100644 index 0000000..938bff5 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_escaped_delimiter/csv_escaped_delimiter.3.query.sqlpp @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/** + * + * CSV file loading test + * Expected result: success + * + */ + +use temp; + +select * from testds +order by id; diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/load/csv_escaped_delimiter/csv_escaped_delimiter.1.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/load/csv_escaped_delimiter/csv_escaped_delimiter.1.adm new file mode 100644 index 0000000..eac588c --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/load/csv_escaped_delimiter/csv_escaped_delimiter.1.adm @@ -0,0 +1 @@ +{ "testds": { "id": 1, "string": ",", "string2": ", 1a", "string3": "" } } \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml index 1b49358..b4c22f2 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml +++ b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml @@ -8361,6 +8361,11 @@ </compilation-unit> </test-case> <test-case FilePath="load"> + <compilation-unit name="csv_escaped_delimiter"> + <output-dir compare="Text">csv_escaped_delimiter</output-dir> + </compilation-unit> + </test-case> + <test-case FilePath="load"> <compilation-unit name="issue14_query"> <output-dir compare="Text">issue14_query</output-dir> <expected-error>Unspecified parameter: format</expected-error> diff --git a/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/file/FieldCursorForDelimitedDataParser.java b/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/file/FieldCursorForDelimitedDataParser.java index 7e5ee2c..d48fd3b 100644 --- a/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/file/FieldCursorForDelimitedDataParser.java +++ b/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/file/FieldCursorForDelimitedDataParser.java @@ -263,7 +263,7 @@ lastDelimiterPosition = p; return true; } else if (startedQuote) { - if (lastQuotePosition == p - 1 && lastDoubleQuotePosition != p - 1) { + if (lastQuotePosition == p - 1 && lastDoubleQuotePosition != p - 1 && quoteCount % 2 == 0) { // There is a quote right before the delimiter (e.g. ",) and it is not two quote, // then the field contains a valid string. // We set the position of fStart to +1, fEnd to -1 to remove quote character @@ -271,7 +271,6 @@ fEnd = p - 1; start = p + 1; lastDelimiterPosition = p; - startedQuote = false; return true; } else if (lastQuotePosition < p - 1 && lastQuotePosition != lastDoubleQuotePosition && quoteCount == doubleQuoteCount * 2 + 2) { -- To view, visit https://asterix-gerrit.ics.uci.edu/2590 To unsubscribe, visit https://asterix-gerrit.ics.uci.edu/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I1fd4435702041a2c857cdf2c3fe95f999aa6720a Gerrit-PatchSet: 1 Gerrit-Project: asterixdb Gerrit-Branch: release-0.9.4-pre-rc Gerrit-Owner: Ian Maxon <ima...@apache.org>