This is an automated email from the ASF dual-hosted git repository. mblow pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/asterixdb.git
commit 5e9538b09dd13f79fb6e49a180c0f63a18b8d76b Author: Ali Alsuliman <[email protected]> AuthorDate: Thu Apr 23 21:08:44 2020 -0700 [ASTERIXDB-2713][EXT] CSV & TSV support for external dataset p4 - user model changes: no - storage format changes: no - interface changes: yes - IValueParser Details: - added parameter "null" to allow user to specifiy what string represents a null value. Only nullable fields whose values match this string will produce NULL. Otherwise, no NULLs will be produced. - empty fields: - for string fields, produce empty string "". - for non-string fields, issue a warning an ignore the record. - changed IValueParser to return boolean to allow some implementations to return true or false instread of throwing an exception. - added parameter "redact-warnings" to allow user to specifiy if parser warnings should exclude information like file name. - changed the Integer and Long parsers to consider spaces properly and also to handle overflow and underflow. - changed the boolean parser to align with other parsers that ignore leading and trailing spaces. - added test cases. Change-Id: Ib6aed0095a472510b69cc29a3e444e7de5a2c1ae Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/5963 Integration-Tests: Jenkins <[email protected]> Reviewed-by: Ali Alsuliman <[email protected]> Reviewed-by: Murtadha Hubail <[email protected]> Tested-by: Jenkins <[email protected]> --- .../data/csv/header/h_invalid_values.csv | 13 ++ .../csv-parser-001/csv-parser-001.1.ddl.sqlpp | 2 +- .../s3/csv-warnings/query-dataset.002.ddl.sqlpp | 2 + ...t.002.ddl.sqlpp => query-dataset.007.ddl.sqlpp} | 8 +- .../s3/csv-warnings/query-dataset.008.query.sqlpp} | 15 +- .../csv-warnings/query-dataset.009.s3bucket.sqlpp} | 12 +- ...t.002.ddl.sqlpp => query-dataset.010.ddl.sqlpp} | 13 +- .../s3/csv-warnings/query-dataset.011.query.sqlpp} | 15 +- ...t.002.ddl.sqlpp => query-dataset.012.ddl.sqlpp} | 12 +- .../s3/csv-warnings/query-dataset.013.query.sqlpp} | 15 +- .../aws/s3/csv/000/query-dataset.000.ddl.sqlpp | 3 +- .../aws/s3/tsv/000/query-dataset.000.ddl.sqlpp | 3 +- .../load/csv_01/csv_01.2.update.sqlpp | 2 +- .../load/csv_02/csv_02.2.update.sqlpp | 2 +- .../load/csv_03/csv_03.2.update.sqlpp | 2 +- .../load/csv_08_header_cr/csv_08.2.update.sqlpp | 2 +- .../load/csv_08_header_crlf/csv_08.2.update.sqlpp | 2 +- .../load/csv_08_header_lf/csv_08.2.update.sqlpp | 2 +- .../queries_sqlpp/tpcds/q19/q19.2.update.sqlpp | 12 +- .../queries_sqlpp/tpcds/q25/q25.2.update.sqlpp | 12 +- .../queries_sqlpp/tpcds/q29/q29.2.update.sqlpp | 12 +- .../queries_sqlpp/tpcds/q30/q30.2.update.sqlpp | 8 +- .../queries_sqlpp/tpcds/q34/q34.2.update.sqlpp | 10 +- .../queries_sqlpp/tpcds/q43/q43.2.update.sqlpp | 6 +- .../queries_sqlpp/tpcds/q46/q46.2.update.sqlpp | 12 +- .../queries_sqlpp/tpcds/q50/q50.2.update.sqlpp | 8 +- .../queries_sqlpp/tpcds/q57/q57.2.update.sqlpp | 8 +- .../queries_sqlpp/tpcds/q59/q59.2.update.sqlpp | 6 +- .../queries_sqlpp/tpcds/q62/q62.2.update.sqlpp | 10 +- .../queries_sqlpp/tpcds/q63/q63.2.update.sqlpp | 8 +- .../queries_sqlpp/tpcds/q73/q73.2.update.sqlpp | 10 +- .../queries_sqlpp/tpcds/q79/q79.2.update.sqlpp | 10 +- .../queries_sqlpp/tpcds/q81/q81.2.update.sqlpp | 8 +- .../queries_sqlpp/tpcds/q85/q85.2.update.sqlpp | 14 +- .../queries_sqlpp/tpcds/q88/q88.2.update.sqlpp | 8 +- .../queries_sqlpp/tpcds/q89/q89.2.update.sqlpp | 8 +- .../queries_sqlpp/tpcds/q90/q90.2.update.sqlpp | 8 +- .../queries_sqlpp/tpcds/q91/q91.2.update.sqlpp | 14 +- .../queries_sqlpp/tpcds/q96/q96.2.update.sqlpp | 8 +- .../query-ASTERIXDB-1591.2.update.sqlpp | 14 +- .../query-ASTERIXDB-1596.2.update.sqlpp | 6 +- .../aws/s3/csv-warnings/external_dataset.003.adm | 1 + .../aws/s3/csv-warnings/external_dataset.004.adm | 3 + .../aws/s3/csv-warnings/external_dataset.005.adm | 3 + .../testsuite_external_dataset_one_partition.xml | 27 ++- .../input/record/RecordWithMetadataAndPK.java | 4 +- .../stream/EmptyLineSeparatedRecordReader.java | 2 +- .../record/reader/stream/LineRecordReader.java | 2 +- .../reader/stream/QuotedLineRecordReader.java | 4 +- .../reader/stream/SemiStructuredRecordReader.java | 2 +- .../record/reader/stream/StreamRecordReader.java | 14 +- .../external/parser/DelimitedDataParser.java | 55 ++++-- .../parser/factory/DelimitedDataParserFactory.java | 3 +- .../external/util/ExternalDataConstants.java | 5 + .../asterix/external/util/ExternalDataUtils.java | 17 +- .../om/base/temporal/ADateParserFactory.java | 9 +- .../om/base/temporal/ADateTimeParserFactory.java | 3 +- .../om/base/temporal/ADurationParserFactory.java | 3 +- .../om/base/temporal/ATimeParserFactory.java | 7 +- .../ABinaryHexStringConstructorDescriptor.java | 5 +- .../src/main/resources/errormsg/en.properties | 2 +- .../common/data/parsers/BooleanParserFactory.java | 51 +++-- .../data/parsers/ByteArrayBase64ParserFactory.java | 3 +- .../data/parsers/ByteArrayHexParserFactory.java | 3 +- .../common/data/parsers/DoubleParserFactory.java | 5 +- .../common/data/parsers/FloatParserFactory.java | 5 +- .../dataflow/common/data/parsers/IValueParser.java | 2 +- .../common/data/parsers/IntegerParserFactory.java | 122 +++++------- .../common/data/parsers/LongParserFactory.java | 123 +++++------- .../data/parsers/UTF8StringParserFactory.java | 3 +- .../parsers/ByteArrayBase64ParserFactoryTest.java | 3 +- .../parsers/ByteArrayHexParserFactoryTest.java | 3 +- .../common/data/parsers/ParserFactoryTest.java | 218 +++++++++++++++++++++ .../std/file/DelimitedDataTupleParserFactory.java | 6 +- .../examples/text/WordTupleParserFactory.java | 4 +- 75 files changed, 658 insertions(+), 409 deletions(-) diff --git a/asterixdb/asterix-app/data/csv/header/h_invalid_values.csv b/asterixdb/asterix-app/data/csv/header/h_invalid_values.csv new file mode 100644 index 0000000..1ec6140 --- /dev/null +++ b/asterixdb/asterix-app/data/csv/header/h_invalid_values.csv @@ -0,0 +1,13 @@ +bigint_t,nullable_bigint_t,double_t,nullable_double_t,str_t,nullable_str_t,boolean_t,nullable_boolean_t +163a,12,33.4,16.1,"text","text",true,false +1.22,12,33.4,16.1,"text","text",true,false +999999999999999999999999999999999,12,33.4,16.1,"text","text",true,false +1234,12,non-double,16.1,"text","text",true,false +1234 ,12, 15.1,16.1,"fine","fine",TRUE,false +1234,12,15.1,16.1,"text","text",non-boolean,false +1234,12,15.1,16.1,"fine","fine", true,false +1234,12,15.1,16.1,"text", "quote-not-at-beginning",true,false +1234,\N,15.1,\N,\N,\N,true,\N +\N,12,15.1,16.1,"text","text",true,false +1234,12,\N,16.1,"text","text",true,false +1234,12,15.1,16.1,"text","text",\N,false \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/csv-tsv-parser/csv-parser-001/csv-parser-001.1.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/csv-tsv-parser/csv-parser-001/csv-parser-001.1.ddl.sqlpp index 113ace3..65816f6 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/csv-tsv-parser/csv-parser-001/csv-parser-001.1.ddl.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/csv-tsv-parser/csv-parser-001/csv-parser-001.1.ddl.sqlpp @@ -30,6 +30,6 @@ CREATE TYPE t4 AS {f1: string, f2: string, f3: string, f4: string}; CREATE EXTERNAL DATASET ds1(t1) USING localfs(("path"="asterix_nc1://data/csv/sample_09.csv"), ("format"="CSV"), ("header"="FALSE")); CREATE EXTERNAL DATASET ds2(t2) USING localfs(("path"="asterix_nc1://data/csv/sample_10.csv"), ("format"="Csv"), ("header"="False")); CREATE EXTERNAL DATASET ds3(t1) USING localfs(("path"="asterix_nc1://data/csv/sample_11.csv"), ("format"="csv"), ("header"="FALSE")); -CREATE EXTERNAL DATASET ds4(t3) USING localfs(("path"="asterix_nc1://data/csv/sample_12.csv"), ("format"="csv"), ("header"="True")); +CREATE EXTERNAL DATASET ds4(t3) USING localfs(("path"="asterix_nc1://data/csv/sample_12.csv"), ("format"="csv"), ("header"="True"), ("null"="")); CREATE EXTERNAL DATASET ds5(t4) USING localfs(("path"="asterix_nc1://data/csv/sample_13.csv"), ("format"="csv"), ("header"="True")); CREATE EXTERNAL DATASET ds6(t4) USING localfs(("path"="asterix_nc1://data/csv/empty_lines.csv"), ("format"="csv"), ("header"="false")); \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv-warnings/query-dataset.002.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv-warnings/query-dataset.002.ddl.sqlpp index 191ddff..0eff4c8 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv-warnings/query-dataset.002.ddl.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv-warnings/query-dataset.002.ddl.sqlpp @@ -22,7 +22,9 @@ CREATE DATAVERSE test; USE test; DROP TYPE t1 IF EXISTS; +DROP TYPE t2 IF EXISTS; CREATE TYPE t1 AS {f1: int, f2: int, f3: int, f4: string}; +CREATE TYPE t2 AS {f1: bigint, f2: bigint?, f3: double, f4: double?, f5: string, f6: string?, f7: boolean, f8: boolean?}; DROP DATASET ds1 IF EXISTS; CREATE EXTERNAL DATASET ds1(t1) USING S3 ( diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv-warnings/query-dataset.002.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv-warnings/query-dataset.007.ddl.sqlpp similarity index 87% copy from asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv-warnings/query-dataset.002.ddl.sqlpp copy to asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv-warnings/query-dataset.007.ddl.sqlpp index 191ddff..cbfe44d 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv-warnings/query-dataset.002.ddl.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv-warnings/query-dataset.007.ddl.sqlpp @@ -17,13 +17,8 @@ * under the License. */ -DROP DATAVERSE test IF EXISTS; -CREATE DATAVERSE test; USE test; -DROP TYPE t1 IF EXISTS; -CREATE TYPE t1 AS {f1: int, f2: int, f3: int, f4: string}; - DROP DATASET ds1 IF EXISTS; CREATE EXTERNAL DATASET ds1(t1) USING S3 ( ("accessKey"="dummyAccessKey"), @@ -33,5 +28,6 @@ CREATE EXTERNAL DATASET ds1(t1) USING S3 ( ("container"="playground"), ("definition"="data_dir"), ("format"="CSV"), -("header"="false") +("header"="false"), +("redact-warnings"="true") ); \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_01/csv_01.2.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv-warnings/query-dataset.008.query.sqlpp similarity index 79% copy from asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_01/csv_01.2.update.sqlpp copy to asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv-warnings/query-dataset.008.query.sqlpp index 988ebe3..26ccfa7 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_01/csv_01.2.update.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv-warnings/query-dataset.008.query.sqlpp @@ -16,15 +16,8 @@ * specific language governing permissions and limitations * under the License. */ -/** - * - * CSV file loading test - * Expected result: success - * - */ - -use temp; - - -load dataset testds using localfs ((`path`=`asterix_nc1://data/csv/sample_01.csv`),(`format`=`delimited-text`),(`delimiter`=`,`)); +// requesttype=application/json +// param max-warnings:json=100 +USE test; +FROM ds1 v SELECT VALUE v ORDER BY v.f1 ASC; \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_01/csv_01.2.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv-warnings/query-dataset.009.s3bucket.sqlpp similarity index 79% copy from asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_01/csv_01.2.update.sqlpp copy to asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv-warnings/query-dataset.009.s3bucket.sqlpp index 988ebe3..f9da983 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_01/csv_01.2.update.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv-warnings/query-dataset.009.s3bucket.sqlpp @@ -16,15 +16,5 @@ * specific language governing permissions and limitations * under the License. */ -/** - * - * CSV file loading test - * Expected result: success - * - */ - -use temp; - - -load dataset testds using localfs ((`path`=`asterix_nc1://data/csv/sample_01.csv`),(`format`=`delimited-text`),(`delimiter`=`,`)); +playground data_dir data/csv/header/h_invalid_values.csv \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv-warnings/query-dataset.002.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv-warnings/query-dataset.010.ddl.sqlpp similarity index 81% copy from asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv-warnings/query-dataset.002.ddl.sqlpp copy to asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv-warnings/query-dataset.010.ddl.sqlpp index 191ddff..bd87ccb 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv-warnings/query-dataset.002.ddl.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv-warnings/query-dataset.010.ddl.sqlpp @@ -17,15 +17,10 @@ * under the License. */ -DROP DATAVERSE test IF EXISTS; -CREATE DATAVERSE test; USE test; -DROP TYPE t1 IF EXISTS; -CREATE TYPE t1 AS {f1: int, f2: int, f3: int, f4: string}; - -DROP DATASET ds1 IF EXISTS; -CREATE EXTERNAL DATASET ds1(t1) USING S3 ( +DROP DATASET ds2 IF EXISTS; +CREATE EXTERNAL DATASET ds2(t2) USING S3 ( ("accessKey"="dummyAccessKey"), ("secretKey"="dummySecretKey"), ("region"="us-west-2"), @@ -33,5 +28,7 @@ CREATE EXTERNAL DATASET ds1(t1) USING S3 ( ("container"="playground"), ("definition"="data_dir"), ("format"="CSV"), -("header"="false") +("header"="true"), +("redact-warnings"="true"), +("null"="\\N") ); \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_01/csv_01.2.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv-warnings/query-dataset.011.query.sqlpp similarity index 79% copy from asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_01/csv_01.2.update.sqlpp copy to asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv-warnings/query-dataset.011.query.sqlpp index 988ebe3..e6b24f3 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_01/csv_01.2.update.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv-warnings/query-dataset.011.query.sqlpp @@ -16,15 +16,8 @@ * specific language governing permissions and limitations * under the License. */ -/** - * - * CSV file loading test - * Expected result: success - * - */ - -use temp; - - -load dataset testds using localfs ((`path`=`asterix_nc1://data/csv/sample_01.csv`),(`format`=`delimited-text`),(`delimiter`=`,`)); +// requesttype=application/json +// param max-warnings:json=100 +USE test; +FROM ds2 v SELECT VALUE v ORDER BY v.f1 ASC; \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv-warnings/query-dataset.002.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv-warnings/query-dataset.012.ddl.sqlpp similarity index 81% copy from asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv-warnings/query-dataset.002.ddl.sqlpp copy to asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv-warnings/query-dataset.012.ddl.sqlpp index 191ddff..e394f2d 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv-warnings/query-dataset.002.ddl.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv-warnings/query-dataset.012.ddl.sqlpp @@ -17,15 +17,10 @@ * under the License. */ -DROP DATAVERSE test IF EXISTS; -CREATE DATAVERSE test; USE test; -DROP TYPE t1 IF EXISTS; -CREATE TYPE t1 AS {f1: int, f2: int, f3: int, f4: string}; - -DROP DATASET ds1 IF EXISTS; -CREATE EXTERNAL DATASET ds1(t1) USING S3 ( +DROP DATASET ds2 IF EXISTS; +CREATE EXTERNAL DATASET ds2(t2) USING S3 ( ("accessKey"="dummyAccessKey"), ("secretKey"="dummySecretKey"), ("region"="us-west-2"), @@ -33,5 +28,6 @@ CREATE EXTERNAL DATASET ds1(t1) USING S3 ( ("container"="playground"), ("definition"="data_dir"), ("format"="CSV"), -("header"="false") +("header"="true"), +("null"="\\N") ); \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_01/csv_01.2.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv-warnings/query-dataset.013.query.sqlpp similarity index 79% copy from asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_01/csv_01.2.update.sqlpp copy to asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv-warnings/query-dataset.013.query.sqlpp index 988ebe3..e6b24f3 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_01/csv_01.2.update.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv-warnings/query-dataset.013.query.sqlpp @@ -16,15 +16,8 @@ * specific language governing permissions and limitations * under the License. */ -/** - * - * CSV file loading test - * Expected result: success - * - */ - -use temp; - - -load dataset testds using localfs ((`path`=`asterix_nc1://data/csv/sample_01.csv`),(`format`=`delimited-text`),(`delimiter`=`,`)); +// requesttype=application/json +// param max-warnings:json=100 +USE test; +FROM ds2 v SELECT VALUE v ORDER BY v.f1 ASC; \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv/000/query-dataset.000.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv/000/query-dataset.000.ddl.sqlpp index 6184b19..15ba6a8 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv/000/query-dataset.000.ddl.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv/000/query-dataset.000.ddl.sqlpp @@ -33,5 +33,6 @@ CREATE EXTERNAL DATASET test(test) USING S3 ( ("container"="playground"), ("definition"="csv-data/reviews"), ("format"="Csv"), -("header"="false") +("header"="false"), +("null"="") ); \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/tsv/000/query-dataset.000.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/tsv/000/query-dataset.000.ddl.sqlpp index 194adf6..3c6ad92 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/tsv/000/query-dataset.000.ddl.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/tsv/000/query-dataset.000.ddl.sqlpp @@ -33,5 +33,6 @@ CREATE EXTERNAL DATASET test(test) USING S3 ( ("container"="playground"), ("definition"="tsv-data/reviews"), ("format"="TSV"), -("header"="False") +("header"="False"), +("null"="") ); \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_01/csv_01.2.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_01/csv_01.2.update.sqlpp index 988ebe3..68faf3f 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_01/csv_01.2.update.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_01/csv_01.2.update.sqlpp @@ -26,5 +26,5 @@ use temp; -load dataset testds using localfs ((`path`=`asterix_nc1://data/csv/sample_01.csv`),(`format`=`delimited-text`),(`delimiter`=`,`)); +load dataset testds using localfs ((`path`=`asterix_nc1://data/csv/sample_01.csv`),(`format`=`delimited-text`),(`delimiter`=`,`),(`null`=``)); diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_02/csv_02.2.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_02/csv_02.2.update.sqlpp index 988ebe3..68faf3f 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_02/csv_02.2.update.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_02/csv_02.2.update.sqlpp @@ -26,5 +26,5 @@ use temp; -load dataset testds using localfs ((`path`=`asterix_nc1://data/csv/sample_01.csv`),(`format`=`delimited-text`),(`delimiter`=`,`)); +load dataset testds using localfs ((`path`=`asterix_nc1://data/csv/sample_01.csv`),(`format`=`delimited-text`),(`delimiter`=`,`),(`null`=``)); diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_03/csv_03.2.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_03/csv_03.2.update.sqlpp index 7eaaf1d..fdc1d1a 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_03/csv_03.2.update.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_03/csv_03.2.update.sqlpp @@ -26,5 +26,5 @@ use temp; -load dataset testds using localfs ((`path`=`asterix_nc1://data/csv/sample_02.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset testds using localfs ((`path`=`asterix_nc1://data/csv/sample_02.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_08_header_cr/csv_08.2.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_08_header_cr/csv_08.2.update.sqlpp index e44dfe9..d05c821 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_08_header_cr/csv_08.2.update.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_08_header_cr/csv_08.2.update.sqlpp @@ -26,5 +26,5 @@ use temp; -load dataset testds using localfs ((`path`=`asterix_nc1://data/csv/sample_08_header.csv.cr`),(`format`=`delimited-text`),(`header`=`true`)); +load dataset testds using localfs ((`path`=`asterix_nc1://data/csv/sample_08_header.csv.cr`),(`format`=`delimited-text`),(`header`=`true`),(`null`=``)); diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_08_header_crlf/csv_08.2.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_08_header_crlf/csv_08.2.update.sqlpp index e4da86b..9be6773 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_08_header_crlf/csv_08.2.update.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_08_header_crlf/csv_08.2.update.sqlpp @@ -26,5 +26,5 @@ use temp; -load dataset testds using localfs ((`path`=`asterix_nc1://data/csv/sample_08_header.csv.crlf`),(`format`=`delimited-text`),(`header`=`true`)); +load dataset testds using localfs ((`path`=`asterix_nc1://data/csv/sample_08_header.csv.crlf`),(`format`=`delimited-text`),(`header`=`true`),(`null`=``)); diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_08_header_lf/csv_08.2.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_08_header_lf/csv_08.2.update.sqlpp index e024bd3..102b2aa 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_08_header_lf/csv_08.2.update.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_08_header_lf/csv_08.2.update.sqlpp @@ -26,5 +26,5 @@ use temp; -load dataset testds using localfs ((`path`=`asterix_nc1://data/csv/sample_08_header.csv.lf`),(`format`=`delimited-text`),(`header`=`true`)); +load dataset testds using localfs ((`path`=`asterix_nc1://data/csv/sample_08_header.csv.lf`),(`format`=`delimited-text`),(`header`=`true`),(`null`=``)); diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q19/q19.2.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q19/q19.2.update.sqlpp index b59d644..fc0b44a 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q19/q19.2.update.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q19/q19.2.update.sqlpp @@ -20,14 +20,14 @@ use tpcds; -load dataset customer_address using localfs ((`path`=`asterix_nc1://data/tpcds/customer_address.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset customer_address using localfs ((`path`=`asterix_nc1://data/tpcds/customer_address.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); -load dataset item using localfs ((`path`=`asterix_nc1://data/tpcds/item.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset item using localfs ((`path`=`asterix_nc1://data/tpcds/item.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); -load dataset date_dim using localfs ((`path`=`asterix_nc1://data/tpcds/date_dim.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset date_dim using localfs ((`path`=`asterix_nc1://data/tpcds/date_dim.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); -load dataset store_sales using localfs ((`path`=`asterix_nc1://data/tpcds/store_sales.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset store_sales using localfs ((`path`=`asterix_nc1://data/tpcds/store_sales.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); -load dataset store using localfs ((`path`=`asterix_nc1://data/tpcds/store.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset store using localfs ((`path`=`asterix_nc1://data/tpcds/store.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); -load dataset customer using localfs ((`path`=`asterix_nc1://data/tpcds/customer.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset customer using localfs ((`path`=`asterix_nc1://data/tpcds/customer.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q25/q25.2.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q25/q25.2.update.sqlpp index 3bf9f8d..efd670e 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q25/q25.2.update.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q25/q25.2.update.sqlpp @@ -20,14 +20,14 @@ use tpcds; -load dataset item using localfs ((`path`=`asterix_nc1://data/tpcds/item.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset item using localfs ((`path`=`asterix_nc1://data/tpcds/item.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); -load dataset store_returns using localfs ((`path`=`asterix_nc1://data/tpcds/store_returns.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset store_returns using localfs ((`path`=`asterix_nc1://data/tpcds/store_returns.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); -load dataset store_sales using localfs ((`path`=`asterix_nc1://data/tpcds/store_sales.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset store_sales using localfs ((`path`=`asterix_nc1://data/tpcds/store_sales.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); -load dataset catalog_sales using localfs ((`path`=`asterix_nc1://data/tpcds/catalog_sales.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset catalog_sales using localfs ((`path`=`asterix_nc1://data/tpcds/catalog_sales.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); -load dataset date_dim using localfs ((`path`=`asterix_nc1://data/tpcds/date_dim.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset date_dim using localfs ((`path`=`asterix_nc1://data/tpcds/date_dim.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); -load dataset store using localfs ((`path`=`asterix_nc1://data/tpcds/store.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset store using localfs ((`path`=`asterix_nc1://data/tpcds/store.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q29/q29.2.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q29/q29.2.update.sqlpp index 3bf9f8d..efd670e 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q29/q29.2.update.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q29/q29.2.update.sqlpp @@ -20,14 +20,14 @@ use tpcds; -load dataset item using localfs ((`path`=`asterix_nc1://data/tpcds/item.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset item using localfs ((`path`=`asterix_nc1://data/tpcds/item.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); -load dataset store_returns using localfs ((`path`=`asterix_nc1://data/tpcds/store_returns.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset store_returns using localfs ((`path`=`asterix_nc1://data/tpcds/store_returns.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); -load dataset store_sales using localfs ((`path`=`asterix_nc1://data/tpcds/store_sales.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset store_sales using localfs ((`path`=`asterix_nc1://data/tpcds/store_sales.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); -load dataset catalog_sales using localfs ((`path`=`asterix_nc1://data/tpcds/catalog_sales.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset catalog_sales using localfs ((`path`=`asterix_nc1://data/tpcds/catalog_sales.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); -load dataset date_dim using localfs ((`path`=`asterix_nc1://data/tpcds/date_dim.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset date_dim using localfs ((`path`=`asterix_nc1://data/tpcds/date_dim.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); -load dataset store using localfs ((`path`=`asterix_nc1://data/tpcds/store.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset store using localfs ((`path`=`asterix_nc1://data/tpcds/store.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q30/q30.2.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q30/q30.2.update.sqlpp index 190a142..8d5b408 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q30/q30.2.update.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q30/q30.2.update.sqlpp @@ -20,10 +20,10 @@ use tpcds; -load dataset customer_address using localfs ((`path`=`asterix_nc1://data/tpcds/customer_address.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset customer_address using localfs ((`path`=`asterix_nc1://data/tpcds/customer_address.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); -load dataset date_dim using localfs ((`path`=`asterix_nc1://data/tpcds/date_dim.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset date_dim using localfs ((`path`=`asterix_nc1://data/tpcds/date_dim.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); -load dataset web_returns using localfs ((`path`=`asterix_nc1://data/tpcds/web_returns.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset web_returns using localfs ((`path`=`asterix_nc1://data/tpcds/web_returns.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); -load dataset customer using localfs ((`path`=`asterix_nc1://data/tpcds/customer.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset customer using localfs ((`path`=`asterix_nc1://data/tpcds/customer.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q34/q34.2.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q34/q34.2.update.sqlpp index 97ea2e8..98accdd 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q34/q34.2.update.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q34/q34.2.update.sqlpp @@ -20,12 +20,12 @@ use tpcds; -load dataset household_demographics using localfs ((`path`=`asterix_nc1://data/tpcds/household_demographics.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset household_demographics using localfs ((`path`=`asterix_nc1://data/tpcds/household_demographics.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); -load dataset store_sales using localfs ((`path`=`asterix_nc1://data/tpcds/store_sales.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset store_sales using localfs ((`path`=`asterix_nc1://data/tpcds/store_sales.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); -load dataset date_dim using localfs ((`path`=`asterix_nc1://data/tpcds/date_dim.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset date_dim using localfs ((`path`=`asterix_nc1://data/tpcds/date_dim.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); -load dataset store using localfs ((`path`=`asterix_nc1://data/tpcds/store.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset store using localfs ((`path`=`asterix_nc1://data/tpcds/store.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); -load dataset customer using localfs ((`path`=`asterix_nc1://data/tpcds/customer.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset customer using localfs ((`path`=`asterix_nc1://data/tpcds/customer.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q43/q43.2.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q43/q43.2.update.sqlpp index bbf1838..654a1f1 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q43/q43.2.update.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q43/q43.2.update.sqlpp @@ -20,8 +20,8 @@ use tpcds; -load dataset date_dim using localfs ((`path`=`asterix_nc1://data/tpcds/date_dim.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset date_dim using localfs ((`path`=`asterix_nc1://data/tpcds/date_dim.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); -load dataset store_sales using localfs ((`path`=`asterix_nc1://data/tpcds/store_sales.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset store_sales using localfs ((`path`=`asterix_nc1://data/tpcds/store_sales.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); -load dataset store using localfs ((`path`=`asterix_nc1://data/tpcds/store.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset store using localfs ((`path`=`asterix_nc1://data/tpcds/store.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q46/q46.2.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q46/q46.2.update.sqlpp index 6ab1d16..fc9d724 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q46/q46.2.update.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q46/q46.2.update.sqlpp @@ -20,14 +20,14 @@ use tpcds; -load dataset customer_address using localfs ((`path`=`asterix_nc1://data/tpcds/customer_address.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset customer_address using localfs ((`path`=`asterix_nc1://data/tpcds/customer_address.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); -load dataset household_demographics using localfs ((`path`=`asterix_nc1://data/tpcds/household_demographics.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset household_demographics using localfs ((`path`=`asterix_nc1://data/tpcds/household_demographics.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); -load dataset store_sales using localfs ((`path`=`asterix_nc1://data/tpcds/store_sales.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset store_sales using localfs ((`path`=`asterix_nc1://data/tpcds/store_sales.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); -load dataset date_dim using localfs ((`path`=`asterix_nc1://data/tpcds/date_dim.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset date_dim using localfs ((`path`=`asterix_nc1://data/tpcds/date_dim.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); -load dataset store using localfs ((`path`=`asterix_nc1://data/tpcds/store.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset store using localfs ((`path`=`asterix_nc1://data/tpcds/store.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); -load dataset customer using localfs ((`path`=`asterix_nc1://data/tpcds/customer.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset customer using localfs ((`path`=`asterix_nc1://data/tpcds/customer.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q50/q50.2.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q50/q50.2.update.sqlpp index d50d706..b870b65 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q50/q50.2.update.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q50/q50.2.update.sqlpp @@ -20,10 +20,10 @@ use tpcds; -load dataset store_returns using localfs ((`path`=`asterix_nc1://data/tpcds/store_returns.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset store_returns using localfs ((`path`=`asterix_nc1://data/tpcds/store_returns.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); -load dataset store_sales using localfs ((`path`=`asterix_nc1://data/tpcds/store_sales.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset store_sales using localfs ((`path`=`asterix_nc1://data/tpcds/store_sales.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); -load dataset date_dim using localfs ((`path`=`asterix_nc1://data/tpcds/date_dim.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset date_dim using localfs ((`path`=`asterix_nc1://data/tpcds/date_dim.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); -load dataset store using localfs ((`path`=`asterix_nc1://data/tpcds/store.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset store using localfs ((`path`=`asterix_nc1://data/tpcds/store.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q57/q57.2.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q57/q57.2.update.sqlpp index 290b1ae..25b4d12 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q57/q57.2.update.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q57/q57.2.update.sqlpp @@ -19,11 +19,11 @@ use tpcds; -load dataset call_center using localfs ((`path`=`asterix_nc1://data/tpcds/call_center.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset call_center using localfs ((`path`=`asterix_nc1://data/tpcds/call_center.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); -load dataset catalog_sales using localfs ((`path`=`asterix_nc1://data/tpcds/catalog_sales.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset catalog_sales using localfs ((`path`=`asterix_nc1://data/tpcds/catalog_sales.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); -load dataset date_dim using localfs ((`path`=`asterix_nc1://data/tpcds/date_dim.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset date_dim using localfs ((`path`=`asterix_nc1://data/tpcds/date_dim.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); -load dataset item using localfs ((`path`=`asterix_nc1://data/tpcds/item.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset item using localfs ((`path`=`asterix_nc1://data/tpcds/item.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q59/q59.2.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q59/q59.2.update.sqlpp index 4cadf19..80bfe7a 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q59/q59.2.update.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q59/q59.2.update.sqlpp @@ -20,8 +20,8 @@ use tpcds; -load dataset store_sales using localfs ((`path`=`asterix_nc1://data/tpcds/store_sales.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset store_sales using localfs ((`path`=`asterix_nc1://data/tpcds/store_sales.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); -load dataset date_dim using localfs ((`path`=`asterix_nc1://data/tpcds/date_dim.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset date_dim using localfs ((`path`=`asterix_nc1://data/tpcds/date_dim.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); -load dataset store using localfs ((`path`=`asterix_nc1://data/tpcds/store.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset store using localfs ((`path`=`asterix_nc1://data/tpcds/store.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q62/q62.2.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q62/q62.2.update.sqlpp index 2aab7e4..b1a240c 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q62/q62.2.update.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q62/q62.2.update.sqlpp @@ -20,12 +20,12 @@ use tpcds; -load dataset web_sales using localfs ((`path`=`asterix_nc1://data/tpcds/web_sales.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset web_sales using localfs ((`path`=`asterix_nc1://data/tpcds/web_sales.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); -load dataset date_dim using localfs ((`path`=`asterix_nc1://data/tpcds/date_dim.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset date_dim using localfs ((`path`=`asterix_nc1://data/tpcds/date_dim.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); -load dataset web_site using localfs ((`path`=`asterix_nc1://data/tpcds/web_site.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset web_site using localfs ((`path`=`asterix_nc1://data/tpcds/web_site.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); -load dataset warehouse using localfs ((`path`=`asterix_nc1://data/tpcds/warehouse.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset warehouse using localfs ((`path`=`asterix_nc1://data/tpcds/warehouse.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); -load dataset ship_mode using localfs ((`path`=`asterix_nc1://data/tpcds/ship_mode.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset ship_mode using localfs ((`path`=`asterix_nc1://data/tpcds/ship_mode.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q63/q63.2.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q63/q63.2.update.sqlpp index c2cef59..a56835d 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q63/q63.2.update.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q63/q63.2.update.sqlpp @@ -19,10 +19,10 @@ use tpcds; -load dataset date_dim using localfs ((`path`=`asterix_nc1://data/tpcds/date_dim.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset date_dim using localfs ((`path`=`asterix_nc1://data/tpcds/date_dim.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); -load dataset item using localfs ((`path`=`asterix_nc1://data/tpcds/item.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset item using localfs ((`path`=`asterix_nc1://data/tpcds/item.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); -load dataset store using localfs ((`path`=`asterix_nc1://data/tpcds/store.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset store using localfs ((`path`=`asterix_nc1://data/tpcds/store.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); -load dataset store_sales using localfs ((`path`=`asterix_nc1://data/tpcds/store_sales.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset store_sales using localfs ((`path`=`asterix_nc1://data/tpcds/store_sales.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q73/q73.2.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q73/q73.2.update.sqlpp index 97ea2e8..98accdd 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q73/q73.2.update.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q73/q73.2.update.sqlpp @@ -20,12 +20,12 @@ use tpcds; -load dataset household_demographics using localfs ((`path`=`asterix_nc1://data/tpcds/household_demographics.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset household_demographics using localfs ((`path`=`asterix_nc1://data/tpcds/household_demographics.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); -load dataset store_sales using localfs ((`path`=`asterix_nc1://data/tpcds/store_sales.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset store_sales using localfs ((`path`=`asterix_nc1://data/tpcds/store_sales.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); -load dataset date_dim using localfs ((`path`=`asterix_nc1://data/tpcds/date_dim.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset date_dim using localfs ((`path`=`asterix_nc1://data/tpcds/date_dim.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); -load dataset store using localfs ((`path`=`asterix_nc1://data/tpcds/store.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset store using localfs ((`path`=`asterix_nc1://data/tpcds/store.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); -load dataset customer using localfs ((`path`=`asterix_nc1://data/tpcds/customer.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset customer using localfs ((`path`=`asterix_nc1://data/tpcds/customer.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q79/q79.2.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q79/q79.2.update.sqlpp index 97ea2e8..98accdd 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q79/q79.2.update.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q79/q79.2.update.sqlpp @@ -20,12 +20,12 @@ use tpcds; -load dataset household_demographics using localfs ((`path`=`asterix_nc1://data/tpcds/household_demographics.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset household_demographics using localfs ((`path`=`asterix_nc1://data/tpcds/household_demographics.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); -load dataset store_sales using localfs ((`path`=`asterix_nc1://data/tpcds/store_sales.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset store_sales using localfs ((`path`=`asterix_nc1://data/tpcds/store_sales.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); -load dataset date_dim using localfs ((`path`=`asterix_nc1://data/tpcds/date_dim.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset date_dim using localfs ((`path`=`asterix_nc1://data/tpcds/date_dim.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); -load dataset store using localfs ((`path`=`asterix_nc1://data/tpcds/store.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset store using localfs ((`path`=`asterix_nc1://data/tpcds/store.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); -load dataset customer using localfs ((`path`=`asterix_nc1://data/tpcds/customer.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset customer using localfs ((`path`=`asterix_nc1://data/tpcds/customer.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q81/q81.2.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q81/q81.2.update.sqlpp index b7e0e29..9ce02ba 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q81/q81.2.update.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q81/q81.2.update.sqlpp @@ -20,10 +20,10 @@ use tpcds; -load dataset customer_address using localfs ((`path`=`asterix_nc1://data/tpcds/customer_address.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset customer_address using localfs ((`path`=`asterix_nc1://data/tpcds/customer_address.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); -load dataset date_dim using localfs ((`path`=`asterix_nc1://data/tpcds/date_dim.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset date_dim using localfs ((`path`=`asterix_nc1://data/tpcds/date_dim.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); -load dataset catalog_returns using localfs ((`path`=`asterix_nc1://data/tpcds/catalog_returns.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset catalog_returns using localfs ((`path`=`asterix_nc1://data/tpcds/catalog_returns.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); -load dataset customer using localfs ((`path`=`asterix_nc1://data/tpcds/customer.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset customer using localfs ((`path`=`asterix_nc1://data/tpcds/customer.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q85/q85.2.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q85/q85.2.update.sqlpp index fdb2d4f..821de94 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q85/q85.2.update.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q85/q85.2.update.sqlpp @@ -20,16 +20,16 @@ use tpcds; -load dataset web_page using localfs ((`path`=`asterix_nc1://data/tpcds/web_page.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset web_page using localfs ((`path`=`asterix_nc1://data/tpcds/web_page.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); -load dataset customer_address using localfs ((`path`=`asterix_nc1://data/tpcds/customer_address.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset customer_address using localfs ((`path`=`asterix_nc1://data/tpcds/customer_address.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); -load dataset reason using localfs ((`path`=`asterix_nc1://data/tpcds/reason.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset reason using localfs ((`path`=`asterix_nc1://data/tpcds/reason.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); -load dataset customer_demographics using localfs ((`path`=`asterix_nc1://data/tpcds/customer_demographics.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset customer_demographics using localfs ((`path`=`asterix_nc1://data/tpcds/customer_demographics.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); -load dataset web_sales using localfs ((`path`=`asterix_nc1://data/tpcds/web_sales.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset web_sales using localfs ((`path`=`asterix_nc1://data/tpcds/web_sales.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); -load dataset date_dim using localfs ((`path`=`asterix_nc1://data/tpcds/date_dim.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset date_dim using localfs ((`path`=`asterix_nc1://data/tpcds/date_dim.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); -load dataset web_returns using localfs ((`path`=`asterix_nc1://data/tpcds/web_returns.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset web_returns using localfs ((`path`=`asterix_nc1://data/tpcds/web_returns.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q88/q88.2.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q88/q88.2.update.sqlpp index a673f5b..87cb458 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q88/q88.2.update.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q88/q88.2.update.sqlpp @@ -20,10 +20,10 @@ use tpcds; -load dataset household_demographics using localfs ((`path`=`asterix_nc1://data/tpcds/household_demographics.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset household_demographics using localfs ((`path`=`asterix_nc1://data/tpcds/household_demographics.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); -load dataset time_dim using localfs ((`path`=`asterix_nc1://data/tpcds/time_dim.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset time_dim using localfs ((`path`=`asterix_nc1://data/tpcds/time_dim.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); -load dataset store_sales using localfs ((`path`=`asterix_nc1://data/tpcds/store_sales.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset store_sales using localfs ((`path`=`asterix_nc1://data/tpcds/store_sales.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); -load dataset store using localfs ((`path`=`asterix_nc1://data/tpcds/store.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset store using localfs ((`path`=`asterix_nc1://data/tpcds/store.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q89/q89.2.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q89/q89.2.update.sqlpp index c2cef59..a56835d 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q89/q89.2.update.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q89/q89.2.update.sqlpp @@ -19,10 +19,10 @@ use tpcds; -load dataset date_dim using localfs ((`path`=`asterix_nc1://data/tpcds/date_dim.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset date_dim using localfs ((`path`=`asterix_nc1://data/tpcds/date_dim.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); -load dataset item using localfs ((`path`=`asterix_nc1://data/tpcds/item.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset item using localfs ((`path`=`asterix_nc1://data/tpcds/item.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); -load dataset store using localfs ((`path`=`asterix_nc1://data/tpcds/store.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset store using localfs ((`path`=`asterix_nc1://data/tpcds/store.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); -load dataset store_sales using localfs ((`path`=`asterix_nc1://data/tpcds/store_sales.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset store_sales using localfs ((`path`=`asterix_nc1://data/tpcds/store_sales.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q90/q90.2.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q90/q90.2.update.sqlpp index 319491a..512eaf2 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q90/q90.2.update.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q90/q90.2.update.sqlpp @@ -20,10 +20,10 @@ use tpcds; -load dataset web_page using localfs ((`path`=`asterix_nc1://data/tpcds/web_page.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset web_page using localfs ((`path`=`asterix_nc1://data/tpcds/web_page.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); -load dataset household_demographics using localfs ((`path`=`asterix_nc1://data/tpcds/household_demographics.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset household_demographics using localfs ((`path`=`asterix_nc1://data/tpcds/household_demographics.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); -load dataset time_dim using localfs ((`path`=`asterix_nc1://data/tpcds/time_dim.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset time_dim using localfs ((`path`=`asterix_nc1://data/tpcds/time_dim.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); -load dataset web_sales using localfs ((`path`=`asterix_nc1://data/tpcds/web_sales.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset web_sales using localfs ((`path`=`asterix_nc1://data/tpcds/web_sales.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q91/q91.2.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q91/q91.2.update.sqlpp index 4599485..8d50a4a 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q91/q91.2.update.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q91/q91.2.update.sqlpp @@ -20,16 +20,16 @@ use tpcds; -load dataset customer_address using localfs ((`path`=`asterix_nc1://data/tpcds/customer_address.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset customer_address using localfs ((`path`=`asterix_nc1://data/tpcds/customer_address.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); -load dataset call_center using localfs ((`path`=`asterix_nc1://data/tpcds/call_center.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset call_center using localfs ((`path`=`asterix_nc1://data/tpcds/call_center.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); -load dataset household_demographics using localfs ((`path`=`asterix_nc1://data/tpcds/household_demographics.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset household_demographics using localfs ((`path`=`asterix_nc1://data/tpcds/household_demographics.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); -load dataset customer_demographics using localfs ((`path`=`asterix_nc1://data/tpcds/customer_demographics.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset customer_demographics using localfs ((`path`=`asterix_nc1://data/tpcds/customer_demographics.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); -load dataset date_dim using localfs ((`path`=`asterix_nc1://data/tpcds/date_dim.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset date_dim using localfs ((`path`=`asterix_nc1://data/tpcds/date_dim.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); -load dataset catalog_returns using localfs ((`path`=`asterix_nc1://data/tpcds/catalog_returns.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset catalog_returns using localfs ((`path`=`asterix_nc1://data/tpcds/catalog_returns.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); -load dataset customer using localfs ((`path`=`asterix_nc1://data/tpcds/customer.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset customer using localfs ((`path`=`asterix_nc1://data/tpcds/customer.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q96/q96.2.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q96/q96.2.update.sqlpp index a673f5b..87cb458 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q96/q96.2.update.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q96/q96.2.update.sqlpp @@ -20,10 +20,10 @@ use tpcds; -load dataset household_demographics using localfs ((`path`=`asterix_nc1://data/tpcds/household_demographics.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset household_demographics using localfs ((`path`=`asterix_nc1://data/tpcds/household_demographics.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); -load dataset time_dim using localfs ((`path`=`asterix_nc1://data/tpcds/time_dim.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset time_dim using localfs ((`path`=`asterix_nc1://data/tpcds/time_dim.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); -load dataset store_sales using localfs ((`path`=`asterix_nc1://data/tpcds/store_sales.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset store_sales using localfs ((`path`=`asterix_nc1://data/tpcds/store_sales.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); -load dataset store using localfs ((`path`=`asterix_nc1://data/tpcds/store.csv`),(`format`=`delimited-text`),(`delimiter`=`|`)); +load dataset store using localfs ((`path`=`asterix_nc1://data/tpcds/store.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``)); diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/query-ASTERIXDB-1591/query-ASTERIXDB-1591.2.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/query-ASTERIXDB-1591/query-ASTERIXDB-1591.2.update.sqlpp index 9401cee..194b60e 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/query-ASTERIXDB-1591/query-ASTERIXDB-1591.2.update.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/query-ASTERIXDB-1591/query-ASTERIXDB-1591.2.update.sqlpp @@ -20,22 +20,22 @@ use tpcds; load dataset customer_address using localfs ((`path`=`asterix_nc1://data/tpcds/customer_address.csv`), -(`format`=`delimited-text`), (`delimiter`=`|`)); +(`format`=`delimited-text`), (`delimiter`=`|`),(`null`=``)); load dataset customer using localfs ((`path`=`asterix_nc1://data/tpcds/customer.csv`), -(`format`=`delimited-text`), (`delimiter`=`|`)); +(`format`=`delimited-text`), (`delimiter`=`|`),(`null`=``)); load dataset store_sales using localfs ((`path`=`asterix_nc1://data/tpcds/store_sales.csv`), -(`format`=`delimited-text`), (`delimiter`=`|`)); +(`format`=`delimited-text`), (`delimiter`=`|`),(`null`=``)); load dataset date_dim using localfs ((`path`=`asterix_nc1://data/tpcds/date_dim.csv`), -(`format`=`delimited-text`), (`delimiter`=`|`)); +(`format`=`delimited-text`), (`delimiter`=`|`),(`null`=``)); load dataset catalog_sales using localfs ((`path`=`asterix_nc1://data/tpcds/catalog_sales.csv`), -(`format`=`delimited-text`), (`delimiter`=`|`)); +(`format`=`delimited-text`), (`delimiter`=`|`),(`null`=``)); load dataset catalog_returns using localfs ((`path`=`asterix_nc1://data/tpcds/catalog_returns.csv`), -(`format`=`delimited-text`), (`delimiter`=`|`)); +(`format`=`delimited-text`), (`delimiter`=`|`),(`null`=``)); load dataset item using localfs ((`path`=`asterix_nc1://data/tpcds/item.csv`), -(`format`=`delimited-text`), (`delimiter`=`|`)); +(`format`=`delimited-text`), (`delimiter`=`|`),(`null`=``)); diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/query-ASTERIXDB-1596/query-ASTERIXDB-1596.2.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/query-ASTERIXDB-1596/query-ASTERIXDB-1596.2.update.sqlpp index 01cca8d..9c8aa41 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/query-ASTERIXDB-1596/query-ASTERIXDB-1596.2.update.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/query-ASTERIXDB-1596/query-ASTERIXDB-1596.2.update.sqlpp @@ -20,10 +20,10 @@ use tpcds; load dataset catalog_sales using localfs ((`path`=`asterix_nc1://data/tpcds/catalog_sales.csv`), -(`format`=`delimited-text`), (`delimiter`=`|`)); +(`format`=`delimited-text`), (`delimiter`=`|`),(`null`=``)); load dataset catalog_returns using localfs ((`path`=`asterix_nc1://data/tpcds/catalog_returns.csv`), -(`format`=`delimited-text`), (`delimiter`=`|`)); +(`format`=`delimited-text`), (`delimiter`=`|`),(`null`=``)); load dataset item using localfs ((`path`=`asterix_nc1://data/tpcds/item.csv`), -(`format`=`delimited-text`), (`delimiter`=`|`)); +(`format`=`delimited-text`), (`delimiter`=`|`),(`null`=``)); diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/csv-warnings/external_dataset.003.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/csv-warnings/external_dataset.003.adm new file mode 100644 index 0000000..7d3c940 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/csv-warnings/external_dataset.003.adm @@ -0,0 +1 @@ +{ "f1": 1, "f2": 2, "f3": 3, "f4": "str" } \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/csv-warnings/external_dataset.004.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/csv-warnings/external_dataset.004.adm new file mode 100644 index 0000000..334381b --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/csv-warnings/external_dataset.004.adm @@ -0,0 +1,3 @@ +{ "f1": 1234, "f2": 12, "f3": 15.1, "f4": 16.1, "f5": "fine", "f6": "fine", "f7": true, "f8": false } +{ "f1": 1234, "f2": 12, "f3": 15.1, "f4": 16.1, "f5": "fine", "f6": "fine", "f7": true, "f8": false } +{ "f1": 1234, "f2": null, "f3": 15.1, "f4": null, "f5": "\\N", "f6": null, "f7": true, "f8": null } \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/csv-warnings/external_dataset.005.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/csv-warnings/external_dataset.005.adm new file mode 100644 index 0000000..334381b --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/csv-warnings/external_dataset.005.adm @@ -0,0 +1,3 @@ +{ "f1": 1234, "f2": 12, "f3": 15.1, "f4": 16.1, "f5": "fine", "f6": "fine", "f7": true, "f8": false } +{ "f1": 1234, "f2": 12, "f3": 15.1, "f4": 16.1, "f5": "fine", "f6": "fine", "f7": true, "f8": false } +{ "f1": 1234, "f2": null, "f3": 15.1, "f4": null, "f5": "\\N", "f6": null, "f7": true, "f8": null } \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_one_partition.xml b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_one_partition.xml index 876a8ef..e194c86 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_one_partition.xml +++ b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_one_partition.xml @@ -42,14 +42,35 @@ <test-case FilePath="external-dataset" check-warnings="true"> <compilation-unit name="aws/s3/csv-warnings"> <output-dir compare="Text">aws/s3/csv-warnings</output-dir> - <expected-warn>Parsing error in data_dir/no_h_missing_fields.csv at record 2 field 3: some fields are missing</expected-warn> - <expected-warn>Parsing error in data_dir/no_h_no_closing_q.csv at record 0 field 0: malformed input record ended inside quote</expected-warn> + <expected-warn>Parsing error at data_dir/no_h_missing_fields.csv record 2 field 3: some fields are missing</expected-warn> + <expected-warn>Parsing error at data_dir/no_h_no_closing_q.csv record 0 field 0: malformed input record ended inside quote</expected-warn> + <expected-warn>Parsing error at record 0 field 0: malformed input record ended inside quote</expected-warn> + + <expected-warn>Parsing error at record 4 field 3: invalid value</expected-warn> + <expected-warn>Parsing error at record 1 field 1: invalid value</expected-warn> + <expected-warn>Parsing error at record 10 field 1: invalid value</expected-warn> + <expected-warn>Parsing error at record 2 field 1: invalid value</expected-warn> + <expected-warn>Parsing error at record 3 field 1: invalid value</expected-warn> + <expected-warn>Parsing error at record 6 field 7: invalid value</expected-warn> + <expected-warn>Parsing error at record 12 field 7: invalid value</expected-warn> + <expected-warn>Parsing error at record 11 field 3: invalid value</expected-warn> + <expected-warn>Parsing error at record 8 field 6: a quote should be in the beginning</expected-warn> + + <expected-warn>Parsing error at data_dir/h_invalid_values.csv record 4 field 3: invalid value</expected-warn> + <expected-warn>Parsing error at data_dir/h_invalid_values.csv record 1 field 1: invalid value</expected-warn> + <expected-warn>Parsing error at data_dir/h_invalid_values.csv record 10 field 1: invalid value</expected-warn> + <expected-warn>Parsing error at data_dir/h_invalid_values.csv record 2 field 1: invalid value</expected-warn> + <expected-warn>Parsing error at data_dir/h_invalid_values.csv record 3 field 1: invalid value</expected-warn> + <expected-warn>Parsing error at data_dir/h_invalid_values.csv record 6 field 7: invalid value</expected-warn> + <expected-warn>Parsing error at data_dir/h_invalid_values.csv record 12 field 7: invalid value</expected-warn> + <expected-warn>Parsing error at data_dir/h_invalid_values.csv record 11 field 3: invalid value</expected-warn> + <expected-warn>Parsing error at data_dir/h_invalid_values.csv record 8 field 6: a quote should be in the beginning</expected-warn> </compilation-unit> </test-case> <test-case FilePath="external-dataset" check-warnings="true"> <compilation-unit name="aws/s3/tsv-warnings"> <output-dir compare="Text">aws/s3/tsv-warnings</output-dir> - <expected-warn>Parsing error in data_dir/no_h_missing_fields.tsv at record 2 field 3: some fields are missing</expected-warn> + <expected-warn>Parsing error at data_dir/no_h_missing_fields.tsv record 2 field 3: some fields are missing</expected-warn> </compilation-unit> </test-case> </test-group> diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/RecordWithMetadataAndPK.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/RecordWithMetadataAndPK.java index 959e34d..de889a2 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/RecordWithMetadataAndPK.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/RecordWithMetadataAndPK.java @@ -155,7 +155,9 @@ public class RecordWithMetadataAndPK<T> extends RecordWithPK<T> { fieldValueBufferOutputs[index].writeByte(ATypeTag.SERIALIZED_NULL_TYPE_TAG); } else { fieldValueBufferOutputs[index].writeByte(fieldTypeTags[index]); - valueParsers[index].parse(src, offset, length, fieldValueBufferOutputs[index]); + if (!valueParsers[index].parse(src, offset, length, fieldValueBufferOutputs[index])) { + throw new RuntimeDataException(ErrorCode.FAILED_TO_PARSE_METADATA); + } } } diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/EmptyLineSeparatedRecordReader.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/EmptyLineSeparatedRecordReader.java index 24a68a7..b697b05 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/EmptyLineSeparatedRecordReader.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/EmptyLineSeparatedRecordReader.java @@ -137,7 +137,7 @@ public class EmptyLineSeparatedRecordReader extends StreamRecordReader { @Override public void configure(IHyracksTaskContext ctx, AsterixInputStream inputStream, Map<String, String> config) { - super.configure(inputStream); + super.configure(inputStream, config); this.config = config; } } diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/LineRecordReader.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/LineRecordReader.java index 0d16e0c..a3f560d 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/LineRecordReader.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/LineRecordReader.java @@ -45,7 +45,7 @@ public class LineRecordReader extends StreamRecordReader { @Override public void configure(IHyracksTaskContext ctx, AsterixInputStream inputStream, Map<String, String> config) throws HyracksDataException { - super.configure(inputStream); + super.configure(inputStream, config); this.hasHeader = ExternalDataUtils.hasHeader(config); this.newSource = true; inputStream.setNotificationHandler(this); diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/QuotedLineRecordReader.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/QuotedLineRecordReader.java index 0ed1238..c6e78b0 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/QuotedLineRecordReader.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/QuotedLineRecordReader.java @@ -60,7 +60,7 @@ public class QuotedLineRecordReader extends LineRecordReader { @Override public void notifyNewSource() { if (!record.isEmptyRecord() && warnings.shouldWarn()) { - ParseUtil.warn(warnings, reader.getStreamName(), recordNumber, 0, REC_ENDED_IN_Q); + ParseUtil.warn(warnings, getDataSourceName().get(), recordNumber, 0, REC_ENDED_IN_Q); } // restart for a new record from a new source resetForNewSource(); @@ -106,7 +106,7 @@ public class QuotedLineRecordReader extends LineRecordReader { if (readLength <= 0 || inQuote) { // haven't read anything previously OR have read and in the middle and hit the end if (inQuote && warnings.shouldWarn()) { - ParseUtil.warn(warnings, reader.getStreamName(), recordNumber, 0, REC_ENDED_IN_Q); + ParseUtil.warn(warnings, getDataSourceName().get(), recordNumber, 0, REC_ENDED_IN_Q); } close(); return false; diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/SemiStructuredRecordReader.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/SemiStructuredRecordReader.java index 5ab5730..38eec98 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/SemiStructuredRecordReader.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/SemiStructuredRecordReader.java @@ -48,7 +48,7 @@ public class SemiStructuredRecordReader extends StreamRecordReader { @Override public void configure(IHyracksTaskContext ctx, AsterixInputStream stream, Map<String, String> config) throws HyracksDataException { - super.configure(stream); + super.configure(stream, config); String recStartString = config.get(ExternalDataConstants.KEY_RECORD_START); String recEndString = config.get(ExternalDataConstants.KEY_RECORD_END); // set record opening char diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/StreamRecordReader.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/StreamRecordReader.java index a70f1fe..6139f82 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/StreamRecordReader.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/StreamRecordReader.java @@ -18,6 +18,9 @@ */ package org.apache.asterix.external.input.record.reader.stream; +import static org.apache.asterix.external.util.ExternalDataConstants.EMPTY_STRING; +import static org.apache.asterix.external.util.ExternalDataConstants.KEY_REDACT_WARNINGS; + import java.io.IOException; import java.util.List; import java.util.Map; @@ -31,6 +34,7 @@ import org.apache.asterix.external.dataflow.AbstractFeedDataFlowController; import org.apache.asterix.external.input.record.CharArrayRecord; import org.apache.asterix.external.input.stream.AsterixInputStreamReader; import org.apache.asterix.external.util.ExternalDataConstants; +import org.apache.asterix.external.util.ExternalDataUtils; import org.apache.asterix.external.util.FeedLogManager; import org.apache.hyracks.api.context.IHyracksTaskContext; import org.apache.hyracks.api.exceptions.HyracksDataException; @@ -43,11 +47,15 @@ public abstract class StreamRecordReader implements IRecordReader<char[]>, IStre protected int bufferPosn = 0; protected boolean done = false; protected FeedLogManager feedLogManager; + private Supplier<String> dataSourceName = EMPTY_STRING; - public void configure(AsterixInputStream inputStream) { + public void configure(AsterixInputStream inputStream, Map<String, String> config) { this.reader = new AsterixInputStreamReader(inputStream); record = new CharArrayRecord(); inputBuffer = new char[ExternalDataConstants.DEFAULT_BUFFER_SIZE]; + if (!ExternalDataUtils.isTrue(config, KEY_REDACT_WARNINGS)) { + this.dataSourceName = reader::getStreamName; + } } @Override @@ -106,8 +114,8 @@ public abstract class StreamRecordReader implements IRecordReader<char[]>, IStre } @Override - public Supplier<String> getDataSourceName() { - return reader::getStreamName; + public final Supplier<String> getDataSourceName() { + return dataSourceName; } public abstract List<String> getRecordReaderFormats(); diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/DelimitedDataParser.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/DelimitedDataParser.java index 86b95e1..8ac483e 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/DelimitedDataParser.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/DelimitedDataParser.java @@ -18,6 +18,8 @@ */ package org.apache.asterix.external.parser; +import static org.apache.asterix.external.util.ExternalDataConstants.EMPTY_FIELD; +import static org.apache.asterix.external.util.ExternalDataConstants.INVALID_VAL; import static org.apache.asterix.external.util.ExternalDataConstants.MISSING_FIELDS; import java.io.DataOutput; @@ -37,8 +39,10 @@ import org.apache.asterix.external.api.IStreamDataParser; import org.apache.asterix.external.util.ExternalDataConstants; import org.apache.asterix.external.util.ParseUtil; import org.apache.asterix.om.base.AMutableString; +import org.apache.asterix.om.typecomputer.impl.TypeComputeUtils; import org.apache.asterix.om.types.ARecordType; import org.apache.asterix.om.types.ATypeTag; +import org.apache.asterix.om.types.IAType; import org.apache.asterix.om.utils.NonTaggedFormatUtil; import org.apache.hyracks.api.context.IHyracksTaskContext; import org.apache.hyracks.api.exceptions.HyracksDataException; @@ -64,9 +68,11 @@ public class DelimitedDataParser extends AbstractDataParser implements IStreamDa private final byte[] fieldTypeTags; private final int[] fldIds; private final ArrayBackedValueStorage[] nameBuffers; + private final char[] nullChars; public DelimitedDataParser(IHyracksTaskContext ctx, IValueParserFactory[] valueParserFactories, char fieldDelimiter, - char quote, boolean hasHeader, ARecordType recordType, boolean isStreamParser) throws HyracksDataException { + char quote, boolean hasHeader, ARecordType recordType, boolean isStreamParser, String nullString) + throws HyracksDataException { this.dataSourceName = ExternalDataConstants.EMPTY_STRING; this.warnings = ctx.getWarningCollector(); this.fieldDelimiter = fieldDelimiter; @@ -110,6 +116,7 @@ public class DelimitedDataParser extends AbstractDataParser implements IStreamDa if (!isStreamParser) { cursor = new FieldCursorForDelimitedDataParser(null, this.fieldDelimiter, quote, warnings, dataSourceName); } + this.nullChars = nullString != null ? nullString.toCharArray() : null; } @Override @@ -153,24 +160,26 @@ public class DelimitedDataParser extends AbstractDataParser implements IStreamDa } fieldValueBuffer.reset(); - if (cursor.isFieldEmpty() && recordType.getFieldTypes()[i].getTypeTag() != ATypeTag.STRING - && recordType.getFieldTypes()[i].getTypeTag() != ATypeTag.NULL) { - // if the field is empty and the type is optional, insert - // NULL. Note that string type can also process empty field as an - // empty string - if (!NonTaggedFormatUtil.isOptional(recordType.getFieldTypes()[i])) { - throw new RuntimeDataException(ErrorCode.PARSER_DELIMITED_NONOPTIONAL_NULL, - cursor.getRecordCount(), cursor.getFieldCount()); - } + if (nullChars != null && NonTaggedFormatUtil.isOptional(recordType.getFieldTypes()[i]) && fieldNull()) { fieldValueBufferOutput.writeByte(ATypeTag.SERIALIZED_NULL_TYPE_TAG); } else { + if (cursor.isFieldEmpty() && !canProcessEmptyField(recordType.getFieldTypes()[i])) { + ParseUtil.warn(warnings, dataSourceName.get(), cursor.getRecordCount(), cursor.getFieldCount(), + EMPTY_FIELD); + return false; + } fieldValueBufferOutput.writeByte(fieldTypeTags[i]); // Eliminate double quotes in the field that we are going to parse if (cursor.fieldHasDoubleQuote()) { cursor.eliminateDoubleQuote(); } - valueParsers[i].parse(cursor.getBuffer(), cursor.getFieldStart(), cursor.getFieldLength(), - fieldValueBufferOutput); + boolean success = valueParsers[i].parse(cursor.getBuffer(), cursor.getFieldStart(), + cursor.getFieldLength(), fieldValueBufferOutput); + if (!success) { + ParseUtil.warn(warnings, dataSourceName.get(), cursor.getRecordCount(), cursor.getFieldCount(), + INVALID_VAL); + return false; + } } if (fldIds[i] < 0) { recBuilder.addField(nameBuffers[i], fieldValueBuffer); @@ -223,4 +232,26 @@ public class DelimitedDataParser extends AbstractDataParser implements IStreamDa public void setDataSourceName(Supplier<String> dataSourceName) { this.dataSourceName = dataSourceName == null ? ExternalDataConstants.EMPTY_STRING : dataSourceName; } + + private static boolean canProcessEmptyField(IAType fieldType) { + IAType type = TypeComputeUtils.getActualType(fieldType); + // TODO(ali): investigate what it means for a field to have type NULL. there is no parser implemented for it + return type.getTypeTag() == ATypeTag.STRING || type.getTypeTag() == ATypeTag.NULL; + } + + private boolean fieldNull() { + int fieldLength = cursor.getFieldLength(); + int nullStringLength = nullChars.length; + if (fieldLength != nullStringLength) { + return false; + } + char[] fieldChars = cursor.getBuffer(); + int fieldStart = cursor.getFieldStart(); + for (int i = 0; i < fieldLength; i++) { + if (fieldChars[fieldStart + i] != nullChars[i]) { + return false; + } + } + return true; + } } diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/DelimitedDataParserFactory.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/DelimitedDataParserFactory.java index 46c5152..09f9697 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/DelimitedDataParserFactory.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/DelimitedDataParserFactory.java @@ -50,8 +50,9 @@ public class DelimitedDataParserFactory extends AbstractRecordStreamParserFactor char delimiter = ExternalDataUtils.validateGetDelimiter(configuration); char quote = ExternalDataUtils.validateGetQuote(configuration, delimiter); boolean hasHeader = ExternalDataUtils.hasHeader(configuration); + String nullString = configuration.get(ExternalDataConstants.KEY_NULL_STR); return new DelimitedDataParser(ctx, valueParserFactories, delimiter, quote, hasHeader, recordType, - ExternalDataUtils.getDataSourceType(configuration).equals(DataSourceType.STREAM)); + ExternalDataUtils.getDataSourceType(configuration).equals(DataSourceType.STREAM), nullString); } @Override diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java index f08c6e6..7d1cdc0 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java @@ -111,6 +111,9 @@ public class ExternalDataConstants { public static final String KEY_HTTP_PROXY_PORT = "http-proxy-port"; public static final String KEY_HTTP_PROXY_USER = "http-proxy-user"; public static final String KEY_HTTP_PROXY_PASSWORD = "http-proxy-password"; + // a string representing the NULL value + public static final String KEY_NULL_STR = "null"; + public static final String KEY_REDACT_WARNINGS = "redact-warnings"; /** * Keys for adapter name @@ -265,6 +268,8 @@ public class ExternalDataConstants { public static final String ERROR_PARSE_RECORD = "Parser failed to parse record"; public static final String MISSING_FIELDS = "some fields are missing"; public static final String REC_ENDED_IN_Q = "malformed input record ended inside quote"; + public static final String EMPTY_FIELD = "empty value"; + public static final String INVALID_VAL = "invalid value"; public static class AwsS3Constants { public static final String REGION_FIELD_NAME = "region"; diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java index b7b441b..b1f11c9 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java @@ -18,6 +18,8 @@ */ package org.apache.asterix.external.util; +import static org.apache.asterix.external.util.ExternalDataConstants.KEY_REDACT_WARNINGS; + import java.util.EnumMap; import java.util.Map; @@ -189,11 +191,12 @@ public class ExternalDataUtils { } public static boolean hasHeader(Map<String, String> configuration) { - String value = configuration.get(ExternalDataConstants.KEY_HEADER); - if (value != null) { - return Boolean.valueOf(value); - } - return false; + return isTrue(configuration, ExternalDataConstants.KEY_HEADER); + } + + public static boolean isTrue(Map<String, String> configuration, String key) { + String value = configuration.get(key); + return value == null ? false : Boolean.valueOf(value); } public static IRecordReaderFactory<?> createExternalRecordReaderFactory(ILibraryManager libraryManager, @@ -394,6 +397,10 @@ public class ExternalDataUtils { char delimiter = validateGetDelimiter(configuration); validateGetQuote(configuration, delimiter); validateGetQuoteEscape(configuration); + String value = configuration.get(KEY_REDACT_WARNINGS); + if (value != null && !isBoolean(value)) { + throw new RuntimeDataException(ErrorCode.INVALID_REQ_PARAM_VAL, KEY_REDACT_WARNINGS, value); + } } private static boolean isHeaderRequiredFor(String format) { diff --git a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/base/temporal/ADateParserFactory.java b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/base/temporal/ADateParserFactory.java index ed118f9..35691a1 100644 --- a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/base/temporal/ADateParserFactory.java +++ b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/base/temporal/ADateParserFactory.java @@ -43,9 +43,10 @@ public class ADateParserFactory implements IValueParserFactory { return new IValueParser() { @Override - public void parse(char[] buffer, int start, int length, DataOutput out) throws HyracksDataException { + public boolean parse(char[] buffer, int start, int length, DataOutput out) throws HyracksDataException { try { out.writeInt((int) (parseDatePart(buffer, start, length) / GregorianCalendarSystem.CHRONON_OF_DAY)); + return true; } catch (IOException ex) { throw HyracksDataException.create(ex); } @@ -56,11 +57,11 @@ public class ADateParserFactory implements IValueParserFactory { /** * Parse the given char sequence as a date string, and return the milliseconds represented by the date. * - * @param charAccessor + * @param dateString * accessor for the char sequence - * @param isDateOnly + * @param start * indicating whether it is a single date string, or it is the date part of a datetime string - * @param errorMessage + * @param length * @return * @throws Exception */ diff --git a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/base/temporal/ADateTimeParserFactory.java b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/base/temporal/ADateTimeParserFactory.java index 2e32692..f9e53b9 100644 --- a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/base/temporal/ADateTimeParserFactory.java +++ b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/base/temporal/ADateTimeParserFactory.java @@ -43,7 +43,7 @@ public class ADateTimeParserFactory implements IValueParserFactory { return new IValueParser() { @Override - public void parse(char[] buffer, int start, int length, DataOutput out) throws HyracksDataException { + public boolean parse(char[] buffer, int start, int length, DataOutput out) throws HyracksDataException { long chrononTimeInMs = 0; short timeOffset = (short) ((buffer[start] == '-') ? 1 : 0); @@ -64,6 +64,7 @@ public class ADateTimeParserFactory implements IValueParserFactory { try { out.writeLong(chrononTimeInMs); + return true; } catch (IOException ex) { throw HyracksDataException.create(ex); } diff --git a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/base/temporal/ADurationParserFactory.java b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/base/temporal/ADurationParserFactory.java index 66fb4c3..ec387e2 100644 --- a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/base/temporal/ADurationParserFactory.java +++ b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/base/temporal/ADurationParserFactory.java @@ -52,11 +52,12 @@ public class ADurationParserFactory implements IValueParserFactory { return new IValueParser() { @Override - public void parse(char[] buffer, int start, int length, DataOutput out) throws HyracksDataException { + public boolean parse(char[] buffer, int start, int length, DataOutput out) throws HyracksDataException { parseDuration(buffer, start, length, aMutableDuration, ADurationParseOption.All); try { out.writeInt(aMutableDuration.getMonths()); out.writeLong(aMutableDuration.getMilliseconds()); + return true; } catch (IOException ex) { throw HyracksDataException.create(ex); } diff --git a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/base/temporal/ATimeParserFactory.java b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/base/temporal/ATimeParserFactory.java index 40ddc55..039e026 100644 --- a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/base/temporal/ATimeParserFactory.java +++ b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/base/temporal/ATimeParserFactory.java @@ -43,9 +43,10 @@ public class ATimeParserFactory implements IValueParserFactory { return new IValueParser() { @Override - public void parse(char[] buffer, int start, int length, DataOutput out) throws HyracksDataException { + public boolean parse(char[] buffer, int start, int length, DataOutput out) throws HyracksDataException { try { out.writeInt(parseTimePart(buffer, start, length)); + return true; } catch (IOException ex) { throw HyracksDataException.create(ex); } @@ -318,7 +319,7 @@ public class ATimeParserFactory implements IValueParserFactory { * * @param timeString * @param start - * @param length + * * @return * @throws HyracksDataException */ @@ -473,7 +474,7 @@ public class ATimeParserFactory implements IValueParserFactory { * * @param timeString * @param start - * @param length + * * @return * @throws HyracksDataException */ diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/constructors/ABinaryHexStringConstructorDescriptor.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/constructors/ABinaryHexStringConstructorDescriptor.java index 3d65b0b..505b650 100644 --- a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/constructors/ABinaryHexStringConstructorDescriptor.java +++ b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/constructors/ABinaryHexStringConstructorDescriptor.java @@ -111,7 +111,10 @@ public class ABinaryHexStringConstructorDescriptor extends AbstractScalarFunctio utf8Ptr.set(inputArg.getByteArray(), startOffset + 1, len - 1); char[] buffer = utf8Ptr.toString().toCharArray(); out.write(ATypeTag.BINARY.serialize()); - byteArrayParser.parse(buffer, 0, buffer.length, out); + if (!byteArrayParser.parse(buffer, 0, buffer.length, out)) { + PointableHelper.setNull(result); + return; + } result.set(resultStorage); } else { throw new TypeMismatchException(sourceLoc, BuiltinFunctions.BINARY_HEX_CONSTRUCTOR, 0, tt, diff --git a/hyracks-fullstack/hyracks/hyracks-api/src/main/resources/errormsg/en.properties b/hyracks-fullstack/hyracks/hyracks-api/src/main/resources/errormsg/en.properties index bed21f5..4e3cf4e 100644 --- a/hyracks-fullstack/hyracks/hyracks-api/src/main/resources/errormsg/en.properties +++ b/hyracks-fullstack/hyracks/hyracks-api/src/main/resources/errormsg/en.properties @@ -140,7 +140,7 @@ 121 = A numeric type promotion error has occurred: %1$s 122 = Encountered an error while printing the plan 123 = Insufficient memory is provided for the join operators, please increase the join memory budget. -124 = Parsing error in %1$s at record %2$s field %3$s: %4$s +124 = Parsing error at %1$s record %2$s field %3$s: %4$s 10000 = The given rule collection %1$s is not an instance of the List class. 10001 = Cannot compose partition constraint %1$s with %2$s diff --git a/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/BooleanParserFactory.java b/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/BooleanParserFactory.java index 488be04..141b99f 100644 --- a/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/BooleanParserFactory.java +++ b/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/BooleanParserFactory.java @@ -37,26 +37,43 @@ public class BooleanParserFactory implements IValueParserFactory { return BooleanParserFactory::parse; } - public static void parse(char[] buffer, int start, int length, DataOutput out) throws HyracksDataException { - try { - if (length == 4 && (buffer[start] == 't' || buffer[start] == 'T') - && (buffer[start + 1] == 'r' || buffer[start + 1] == 'R') - && (buffer[start + 2] == 'u' || buffer[start + 2] == 'U') - && (buffer[start + 3] == 'e' || buffer[start + 3] == 'E')) { - out.writeBoolean(true); - return; - } else if (length == 5 && (buffer[start] == 'f' || buffer[start] == 'F') - && (buffer[start + 1] == 'a' || buffer[start + 1] == 'A') - && (buffer[start + 2] == 'l' || buffer[start + 2] == 'L') - && (buffer[start + 3] == 's' || buffer[start + 3] == 'S') - && (buffer[start + 4] == 'e' || buffer[start + 4] == 'E')) { - out.writeBoolean(false); - return; + public static boolean parse(char[] buffer, int start, int length, DataOutput out) throws HyracksDataException { + char ch; + int i = start; + int end = start + length; + while (i < end && ((ch = buffer[i]) == ' ' || ch == '\t' || ch == '\n' || ch == '\r' || ch == '\f')) { + i++; + } + int remainingLength = end - i; + boolean gotBoolean = false; + boolean booleanValue = false; + if (remainingLength >= 4 && ((ch = buffer[i]) == 't' || ch == 'T') && ((ch = buffer[i + 1]) == 'r' || ch == 'R') + && ((ch = buffer[i + 2]) == 'u' || ch == 'U') && ((ch = buffer[i + 3]) == 'e' || ch == 'E')) { + gotBoolean = true; + booleanValue = true; + i = i + 4; + } else if (remainingLength >= 5 && ((ch = buffer[i]) == 'f' || ch == 'F') + && ((ch = buffer[i + 1]) == 'a' || ch == 'A') && ((ch = buffer[i + 2]) == 'l' || ch == 'L') + && ((ch = buffer[i + 3]) == 's' || ch == 'S') && ((ch = buffer[i + 4]) == 'e' || ch == 'E')) { + gotBoolean = true; + booleanValue = false; + i = i + 5; + } + + for (; i < end; ++i) { + ch = buffer[i]; + if (ch != ' ' && ch != '\t' && ch != '\n' && ch != '\r' && ch != '\f') { + return false; } + } + if (!gotBoolean) { + return false; + } + try { + out.writeBoolean(booleanValue); + return true; } catch (IOException e) { throw HyracksDataException.create(e); } - - throw new HyracksDataException("Invalid input data"); } } diff --git a/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/ByteArrayBase64ParserFactory.java b/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/ByteArrayBase64ParserFactory.java index 69db7f3..cc5b68c 100644 --- a/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/ByteArrayBase64ParserFactory.java +++ b/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/ByteArrayBase64ParserFactory.java @@ -41,11 +41,12 @@ public class ByteArrayBase64ParserFactory implements IValueParserFactory { ByteArraySerializerDeserializer serializer = ByteArraySerializerDeserializer.INSTANCE; @Override - public void parse(char[] input, int start, int length, DataOutput out) throws HyracksDataException { + public boolean parse(char[] input, int start, int length, DataOutput out) throws HyracksDataException { parser.generatePureByteArrayFromBase64String(input, start, length); try { serializer.serialize(parser.getByteArray(), 0, parser.getLength(), out); + return true; } catch (IOException e) { throw HyracksDataException.create(e); } diff --git a/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/ByteArrayHexParserFactory.java b/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/ByteArrayHexParserFactory.java index c15b2ff..2191c75 100644 --- a/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/ByteArrayHexParserFactory.java +++ b/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/ByteArrayHexParserFactory.java @@ -40,10 +40,11 @@ public class ByteArrayHexParserFactory implements IValueParserFactory { ByteArraySerializerDeserializer serializer = ByteArraySerializerDeserializer.INSTANCE; @Override - public void parse(char[] input, int start, int length, DataOutput out) throws HyracksDataException { + public boolean parse(char[] input, int start, int length, DataOutput out) throws HyracksDataException { try { parser.generateByteArrayFromHexString(input, start, length); serializer.serialize(parser.getByteArray(), 0, parser.getLength(), out); + return true; } catch (IOException e) { throw HyracksDataException.create(e); } diff --git a/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/DoubleParserFactory.java b/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/DoubleParserFactory.java index 8998798..2004397 100644 --- a/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/DoubleParserFactory.java +++ b/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/DoubleParserFactory.java @@ -35,12 +35,13 @@ public class DoubleParserFactory implements IValueParserFactory { public IValueParser createValueParser() { return new IValueParser() { @Override - public void parse(char[] buffer, int start, int length, DataOutput out) throws HyracksDataException { + public boolean parse(char[] buffer, int start, int length, DataOutput out) throws HyracksDataException { String s = String.valueOf(buffer, start, length); try { out.writeDouble(Double.parseDouble(s)); + return true; } catch (NumberFormatException e) { - throw HyracksDataException.create(e); + return false; } catch (IOException e) { throw HyracksDataException.create(e); } diff --git a/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/FloatParserFactory.java b/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/FloatParserFactory.java index 414946c..2b476f1 100644 --- a/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/FloatParserFactory.java +++ b/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/FloatParserFactory.java @@ -35,12 +35,13 @@ public class FloatParserFactory implements IValueParserFactory { public IValueParser createValueParser() { return new IValueParser() { @Override - public void parse(char[] buffer, int start, int length, DataOutput out) throws HyracksDataException { + public boolean parse(char[] buffer, int start, int length, DataOutput out) throws HyracksDataException { String s = String.valueOf(buffer, start, length); try { out.writeFloat(Float.parseFloat(s)); + return true; } catch (NumberFormatException e) { - throw HyracksDataException.create(e); + return false; } catch (IOException e) { throw HyracksDataException.create(e); } diff --git a/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/IValueParser.java b/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/IValueParser.java index 0fcfa90..7b0090a 100644 --- a/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/IValueParser.java +++ b/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/IValueParser.java @@ -23,5 +23,5 @@ import java.io.DataOutput; import org.apache.hyracks.api.exceptions.HyracksDataException; public interface IValueParser { - void parse(char[] buffer, int start, int length, DataOutput out) throws HyracksDataException; + boolean parse(char[] buffer, int start, int length, DataOutput out) throws HyracksDataException; } diff --git a/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/IntegerParserFactory.java b/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/IntegerParserFactory.java index c6cffb4..450aa70 100644 --- a/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/IntegerParserFactory.java +++ b/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/IntegerParserFactory.java @@ -35,91 +35,61 @@ public class IntegerParserFactory implements IValueParserFactory { public IValueParser createValueParser() { return new IValueParser() { @Override - public void parse(char[] buffer, int start, int length, DataOutput out) throws HyracksDataException { - int n = 0; - int sign = 1; - int i = 0; - boolean pre = true; - for (; pre && i < length; ++i) { - char ch = buffer[i + start]; - switch (ch) { - case ' ': - case '\t': - case '\n': - case '\r': - case '\f': - break; - - case '-': - sign = -1; - pre = false; - break; - - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - pre = false; - n = n * 10 + (ch - '0'); - break; - - default: - String errorString = new String(buffer, i + start, length - i); - throw new HyracksDataException( - "Integer Parser - a digit is expected. But, encountered this character: " + ch - + " in the incoming input: " + errorString); + public boolean parse(char[] buffer, int start, int length, DataOutput out) throws HyracksDataException { + // accumulating negatively like Integer.parse() to avoid surprises near MAX_VALUE + char c; + int i = start; + int end = start + length; + while (i < end && ((c = buffer[i]) == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f')) { + i++; + } + boolean negative = false; + int limit = -Integer.MAX_VALUE; + if (i < end) { + c = buffer[i]; + if (c == '-') { + negative = true; + limit = Integer.MIN_VALUE; + i++; + } + if (c == '+') { + i++; } } - boolean post = false; - for (; !post && i < length; ++i) { - char ch = buffer[i + start]; - switch (ch) { - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - n = n * 10 + (ch - '0'); - break; - default: - String errorString = new String(buffer, i + start, length - i); - throw new HyracksDataException( - "Integer Parser - a digit is expected. But, encountered this character: " + ch - + " in the incoming input: " + errorString); + int result = 0; + int multiplicationMin = limit / 10; + boolean gotNumber = false; + for (; i < end; i++) { + c = buffer[i]; + if (c >= '0' && c <= '9') { + gotNumber = true; + if (result < multiplicationMin) { + return false; + } + result *= 10; + int digit = c - '0'; + if (result < limit + digit) { + return false; + } + result -= digit; + } else { + break; } } - for (; i < length; ++i) { - char ch = buffer[i + start]; - switch (ch) { - case ' ': - case '\t': - case '\n': - case '\r': - case '\f': - break; - - default: - String errorString = new String(buffer, i + start, length - i); - throw new HyracksDataException("Integer Parser - a whitespace, tab, new line, or " - + "form-feed expected. But, encountered this character: " + ch - + " in the incoming input: " + errorString); + for (; i < end; ++i) { + c = buffer[i]; + if (c != ' ' && c != '\t' && c != '\n' && c != '\r' && c != '\f') { + return false; } } + if (!gotNumber) { + return false; + } try { - out.writeInt(n * sign); + out.writeInt(negative ? result : -result); + return true; } catch (IOException e) { throw HyracksDataException.create(e); } diff --git a/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/LongParserFactory.java b/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/LongParserFactory.java index fd47475..2cd350c 100644 --- a/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/LongParserFactory.java +++ b/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/LongParserFactory.java @@ -35,92 +35,61 @@ public class LongParserFactory implements IValueParserFactory { public IValueParser createValueParser() { return new IValueParser() { @Override - public void parse(char[] buffer, int start, int length, DataOutput out) throws HyracksDataException { - long n = 0; - int sign = 1; - int i = 0; - boolean pre = true; - for (; pre && i < length; ++i) { - char ch = buffer[i + start]; - switch (ch) { - case ' ': - case '\t': - case '\n': - case '\r': - case '\f': - break; - - case '-': - sign = -1; - pre = false; - break; - - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - pre = false; - n = n * 10 + (ch - '0'); - break; - - default: - String errorString = new String(buffer, i + start, length - i); - throw new HyracksDataException( - "Long Parser - a digit is expected. But, encountered this character: " + ch - + " in the incoming input: " + errorString); + public boolean parse(char[] buffer, int start, int length, DataOutput out) throws HyracksDataException { + // accumulating negatively like Long.parse() to avoid surprises near MAX_VALUE + char c; + int i = start; + int end = start + length; + while (i < end && ((c = buffer[i]) == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f')) { + i++; + } + boolean negative = false; + long limit = -Long.MAX_VALUE; + if (i < end) { + c = buffer[i]; + if (c == '-') { + negative = true; + limit = Long.MIN_VALUE; + i++; + } + if (c == '+') { + i++; } } - boolean post = false; - for (; !post && i < length; ++i) { - char ch = buffer[i + start]; - switch (ch) { - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - n = n * 10 + (ch - '0'); - break; - default: - String errorString = new String(buffer, i + start, length - i); - throw new HyracksDataException( - "Long Parser - a digit is expected. But, encountered this character: " + ch - + " in the incoming input: " + errorString); + long result = 0; + long multiplicationMin = limit / 10; + boolean gotNumber = false; + for (; i < end; i++) { + c = buffer[i]; + if (c >= '0' && c <= '9') { + gotNumber = true; + if (result < multiplicationMin) { + return false; + } + result *= 10; + int digit = c - '0'; + if (result < limit + digit) { + return false; + } + result -= digit; + } else { + break; } } - for (; i < length; ++i) { - char ch = buffer[i + start]; - switch (ch) { - case ' ': - case '\t': - case '\n': - case '\r': - case '\f': - break; - - default: - String errorString = new String(buffer, i + start, length - i); - throw new HyracksDataException( - "Long Parser - a whitespace, tab, new line, or form-feed expected. " - + "But, encountered this character: " + ch + " in the incoming input: " - + errorString); + for (; i < end; ++i) { + c = buffer[i]; + if (c != ' ' && c != '\t' && c != '\n' && c != '\r' && c != '\f') { + return false; } } + if (!gotNumber) { + return false; + } try { - out.writeLong(n * sign); + out.writeLong(negative ? result : -result); + return true; } catch (IOException e) { throw HyracksDataException.create(e); } diff --git a/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/UTF8StringParserFactory.java b/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/UTF8StringParserFactory.java index 7848500..22f98a6 100644 --- a/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/UTF8StringParserFactory.java +++ b/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/UTF8StringParserFactory.java @@ -38,9 +38,10 @@ public class UTF8StringParserFactory implements IValueParserFactory { private UTF8StringWriter writer = new UTF8StringWriter(); @Override - public void parse(char[] buffer, int start, int length, DataOutput out) throws HyracksDataException { + public boolean parse(char[] buffer, int start, int length, DataOutput out) throws HyracksDataException { try { writer.writeUTF8(buffer, start, length, out); + return true; } catch (IOException e) { throw HyracksDataException.create(e); } diff --git a/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/test/java/org/apache/hyracks/dataflow/common/data/parsers/ByteArrayBase64ParserFactoryTest.java b/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/test/java/org/apache/hyracks/dataflow/common/data/parsers/ByteArrayBase64ParserFactoryTest.java index 29a6d6d..5fcaf65 100644 --- a/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/test/java/org/apache/hyracks/dataflow/common/data/parsers/ByteArrayBase64ParserFactoryTest.java +++ b/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/test/java/org/apache/hyracks/dataflow/common/data/parsers/ByteArrayBase64ParserFactoryTest.java @@ -66,7 +66,8 @@ public class ByteArrayBase64ParserFactoryTest extends TestCase { DataOutputStream outputStream = new DataOutputStream(bos); ByteArrayPointable bytePtr = new ByteArrayPointable(); - parser.parse(test.toCharArray(), 0, test.length(), outputStream); + boolean result = parser.parse(test.toCharArray(), 0, test.length(), outputStream); + assertTrue(result); bytePtr.set(bos.toByteArray(), 0, bos.size()); byte[] answer = DatatypeConverter.parseBase64Binary(test); diff --git a/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/test/java/org/apache/hyracks/dataflow/common/data/parsers/ByteArrayHexParserFactoryTest.java b/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/test/java/org/apache/hyracks/dataflow/common/data/parsers/ByteArrayHexParserFactoryTest.java index c959c8d..a6196e7 100644 --- a/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/test/java/org/apache/hyracks/dataflow/common/data/parsers/ByteArrayHexParserFactoryTest.java +++ b/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/test/java/org/apache/hyracks/dataflow/common/data/parsers/ByteArrayHexParserFactoryTest.java @@ -54,7 +54,8 @@ public class ByteArrayHexParserFactoryTest { DataOutputStream outputStream = new DataOutputStream(bos); ByteArrayPointable bytePtr = new ByteArrayPointable(); - parser.parse(test.toCharArray(), 0, test.length(), outputStream); + boolean result = parser.parse(test.toCharArray(), 0, test.length(), outputStream); + assertTrue(result); bytePtr.set(bos.toByteArray(), 0, bos.size()); diff --git a/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/test/java/org/apache/hyracks/dataflow/common/data/parsers/ParserFactoryTest.java b/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/test/java/org/apache/hyracks/dataflow/common/data/parsers/ParserFactoryTest.java new file mode 100644 index 0000000..e3e77da --- /dev/null +++ b/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/test/java/org/apache/hyracks/dataflow/common/data/parsers/ParserFactoryTest.java @@ -0,0 +1,218 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hyracks.dataflow.common.data.parsers; + +import org.apache.hyracks.api.exceptions.HyracksDataException; +import org.apache.hyracks.data.std.primitive.BooleanPointable; +import org.apache.hyracks.data.std.primitive.IntegerPointable; +import org.apache.hyracks.data.std.primitive.LongPointable; +import org.apache.hyracks.data.std.util.ArrayBackedValueStorage; +import org.junit.Test; + +import junit.framework.TestCase; + +public class ParserFactoryTest extends TestCase { + + private final ArrayBackedValueStorage storage = new ArrayBackedValueStorage(); + private final IValueParser integerParser = IntegerParserFactory.INSTANCE.createValueParser(); + private final IValueParser longParser = LongParserFactory.INSTANCE.createValueParser(); + private final IValueParser booleanParser = BooleanParserFactory.INSTANCE.createValueParser(); + private String chars = ""; + + @Test + public void testInteger() throws HyracksDataException { + String number = "12"; + parse(number, integerParser, storage, IntegerPointable::getInteger, 12, true); + number = "+12"; + parse(number, integerParser, storage, IntegerPointable::getInteger, 12, true); + number = "-12"; + parse(number, integerParser, storage, IntegerPointable::getInteger, -12, true); + number = " 12"; + parse(number, integerParser, storage, IntegerPointable::getInteger, 12, true); + number = " +12"; + parse(number, integerParser, storage, IntegerPointable::getInteger, 12, true); + number = " -12"; + parse(number, integerParser, storage, IntegerPointable::getInteger, -12, true); + number = "12 "; + parse(number, integerParser, storage, IntegerPointable::getInteger, 12, true); + number = "+12 "; + parse(number, integerParser, storage, IntegerPointable::getInteger, 12, true); + number = "-12 "; + parse(number, integerParser, storage, IntegerPointable::getInteger, -12, true); + number = " 12 "; + parse(number, integerParser, storage, IntegerPointable::getInteger, 12, true); + number = " +12 "; + parse(number, integerParser, storage, IntegerPointable::getInteger, 12, true); + number = " -12 "; + parse(number, integerParser, storage, IntegerPointable::getInteger, -12, true); + + number = Integer.toString(Integer.MAX_VALUE); + parse(number, integerParser, storage, IntegerPointable::getInteger, Integer.MAX_VALUE, true); + number = Integer.toString(Integer.MIN_VALUE); + parse(number, integerParser, storage, IntegerPointable::getInteger, Integer.MIN_VALUE, true); + + // overflow and underflow + number = Long.toString(Integer.MAX_VALUE + 1L); + parse(number, integerParser, storage, IntegerPointable::getInteger, null, false); + number = Long.toString(Integer.MIN_VALUE - 1L); + parse(number, integerParser, storage, IntegerPointable::getInteger, null, false); + + // invalid + number = "a"; + parse(number, integerParser, storage, IntegerPointable::getInteger, null, false); + number = "12a"; + parse(number, integerParser, storage, IntegerPointable::getInteger, null, false); + number = "12 a"; + parse(number, integerParser, storage, IntegerPointable::getInteger, null, false); + number = " a 12"; + parse(number, integerParser, storage, IntegerPointable::getInteger, null, false); + number = "a12"; + parse(number, integerParser, storage, IntegerPointable::getInteger, null, false); + number = "+ 12"; + parse(number, integerParser, storage, IntegerPointable::getInteger, null, false); + number = "- 12"; + parse(number, integerParser, storage, IntegerPointable::getInteger, null, false); + number = "1 2"; + parse(number, integerParser, storage, IntegerPointable::getInteger, null, false); + } + + @Test + public void testLong() throws HyracksDataException { + storage.reset(); + String number = "12"; + parse(number, longParser, storage, LongPointable::getLong, 12L, true); + number = "+12"; + parse(number, longParser, storage, LongPointable::getLong, 12L, true); + number = "-12"; + parse(number, longParser, storage, LongPointable::getLong, -12L, true); + number = " 12"; + parse(number, longParser, storage, LongPointable::getLong, 12L, true); + number = " +12"; + parse(number, longParser, storage, LongPointable::getLong, 12L, true); + number = " -12"; + parse(number, longParser, storage, LongPointable::getLong, -12L, true); + number = "12 "; + parse(number, longParser, storage, LongPointable::getLong, 12L, true); + number = "+12 "; + parse(number, longParser, storage, LongPointable::getLong, 12L, true); + number = "-12 "; + parse(number, longParser, storage, LongPointable::getLong, -12L, true); + number = " 12 "; + parse(number, longParser, storage, LongPointable::getLong, 12L, true); + number = " +12 "; + parse(number, longParser, storage, LongPointable::getLong, 12L, true); + number = " -12 "; + parse(number, longParser, storage, LongPointable::getLong, -12L, true); + + number = Long.toString(Long.MAX_VALUE); + parse(number, longParser, storage, LongPointable::getLong, Long.MAX_VALUE, true); + number = Long.toString(Long.MIN_VALUE); + parse(number, longParser, storage, LongPointable::getLong, Long.MIN_VALUE, true); + + // overflow and underflow + number = Long.toString(Long.MAX_VALUE) + "1"; + parse(number, longParser, storage, LongPointable::getLong, null, false); + number = Long.toString(Long.MIN_VALUE) + "1"; + parse(number, longParser, storage, LongPointable::getLong, null, false); + + // invalid + number = "a"; + parse(number, longParser, storage, LongPointable::getLong, null, false); + number = "12a"; + parse(number, longParser, storage, LongPointable::getLong, null, false); + number = "12 a"; + parse(number, longParser, storage, LongPointable::getLong, null, false); + number = " a 12"; + parse(number, longParser, storage, LongPointable::getLong, null, false); + number = "+ 12"; + parse(number, longParser, storage, LongPointable::getLong, null, false); + number = "- 12"; + parse(number, longParser, storage, LongPointable::getLong, null, false); + number = "1 2"; + parse(number, longParser, storage, LongPointable::getLong, null, false); + } + + @Test + public void testBoolean() throws HyracksDataException { + storage.reset(); + String bool = "true"; + parse(bool, booleanParser, storage, BooleanPointable::getBoolean, Boolean.TRUE, true); + bool = "TRUE"; + parse(bool, booleanParser, storage, BooleanPointable::getBoolean, Boolean.TRUE, true); + bool = "True"; + parse(bool, booleanParser, storage, BooleanPointable::getBoolean, Boolean.TRUE, true); + bool = "true "; + parse(bool, booleanParser, storage, BooleanPointable::getBoolean, Boolean.TRUE, true); + bool = " true"; + parse(bool, booleanParser, storage, BooleanPointable::getBoolean, Boolean.TRUE, true); + bool = " True "; + parse(bool, booleanParser, storage, BooleanPointable::getBoolean, Boolean.TRUE, true); + + bool = "false"; + parse(bool, booleanParser, storage, BooleanPointable::getBoolean, Boolean.FALSE, true); + bool = "FALSE"; + parse(bool, booleanParser, storage, BooleanPointable::getBoolean, Boolean.FALSE, true); + bool = "False"; + parse(bool, booleanParser, storage, BooleanPointable::getBoolean, Boolean.FALSE, true); + bool = " false "; + parse(bool, booleanParser, storage, BooleanPointable::getBoolean, Boolean.FALSE, true); + bool = " false"; + parse(bool, booleanParser, storage, BooleanPointable::getBoolean, Boolean.FALSE, true); + bool = "false "; + parse(bool, booleanParser, storage, BooleanPointable::getBoolean, Boolean.FALSE, true); + + // invalid + bool = "foo"; + parse(bool, booleanParser, storage, BooleanPointable::getBoolean, null, false); + bool = "truea"; + parse(bool, booleanParser, storage, BooleanPointable::getBoolean, null, false); + bool = "ffalse"; + parse(bool, booleanParser, storage, BooleanPointable::getBoolean, null, false); + bool = "ffalse"; + parse(bool, booleanParser, storage, BooleanPointable::getBoolean, null, false); + bool = "t rue"; + parse(bool, booleanParser, storage, BooleanPointable::getBoolean, null, false); + bool = "true a"; + parse(bool, booleanParser, storage, BooleanPointable::getBoolean, null, false); + } + + private <T> void parse(String test, IValueParser parser, ArrayBackedValueStorage storage, Getter<T> getter, + T expectedVal, boolean expectedResult) throws HyracksDataException { + int oldSize = storage.getLength(); + int start = storage.getLength(); + int stringStart = chars.length(); + chars = chars + test; + int stringLength = chars.length() - stringStart; + boolean result = parser.parse(chars.toCharArray(), stringStart, stringLength, storage.getDataOutput()); + int newSize = storage.getLength(); + if (!result) { + assertEquals(oldSize, newSize); + } else { + assertEquals(expectedVal, getter.get(storage.getByteArray(), start)); + } + assertEquals(expectedResult, result); + + } + + @FunctionalInterface + private interface Getter<T> { + T get(byte[] bytes, int start); + } +} diff --git a/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/file/DelimitedDataTupleParserFactory.java b/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/file/DelimitedDataTupleParserFactory.java index e91992d..b8b2ba8 100644 --- a/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/file/DelimitedDataTupleParserFactory.java +++ b/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/file/DelimitedDataTupleParserFactory.java @@ -92,8 +92,10 @@ public class DelimitedDataTupleParserFactory implements ITupleParserFactory { if (cursor.fieldHasDoubleQuote()) { cursor.eliminateDoubleQuote(); } - valueParsers[i].parse(cursor.getBuffer(), cursor.getFieldStart(), cursor.getFieldLength(), - dos); + if (!valueParsers[i].parse(cursor.getBuffer(), cursor.getFieldStart(), + cursor.getFieldLength(), dos)) { + throw new HyracksDataException("Failed to parse field"); + } tb.addFieldEndOffset(); } FrameUtils.appendToWriter(writer, appender, tb.getFieldEndOffsets(), tb.getByteArray(), 0, diff --git a/hyracks-fullstack/hyracks/hyracks-examples/text-example/texthelper/src/main/java/org/apache/hyracks/examples/text/WordTupleParserFactory.java b/hyracks-fullstack/hyracks/hyracks-examples/text-example/texthelper/src/main/java/org/apache/hyracks/examples/text/WordTupleParserFactory.java index 2219a14..0032c40 100644 --- a/hyracks-fullstack/hyracks/hyracks-examples/text-example/texthelper/src/main/java/org/apache/hyracks/examples/text/WordTupleParserFactory.java +++ b/hyracks-fullstack/hyracks/hyracks-examples/text-example/texthelper/src/main/java/org/apache/hyracks/examples/text/WordTupleParserFactory.java @@ -54,7 +54,9 @@ public class WordTupleParserFactory implements ITupleParserFactory { WordCursor cursor = new WordCursor(new InputStreamReader(in)); while (cursor.nextWord()) { tb.reset(); - utf8StringParser.parse(cursor.buffer, cursor.fStart, cursor.fEnd - cursor.fStart, dos); + if (!utf8StringParser.parse(cursor.buffer, cursor.fStart, cursor.fEnd - cursor.fStart, dos)) { + throw new HyracksDataException("Failed to parse word"); + } tb.addFieldEndOffset(); FrameUtils.appendToWriter(writer, appender, tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize());
