This is an automated email from the ASF dual-hosted git repository. mblow pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/asterixdb.git
commit 7bd62f396c7052659e561314dee8e74baf00fb36 Author: Ali Alsuliman <[email protected]> AuthorDate: Fri Nov 7 18:51:11 2025 -0800 [ASTERIXDB-3672][EXT] Fix reading array null elements - user model changes: no - storage format changes: no - interface changes: no Ext-ref: MB-66766 Change-Id: I0774b8003a12909ed4d0a6f4201da0e65c181055 Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/20564 Integration-Tests: Jenkins <[email protected]> Reviewed-by: Ali Alsuliman <[email protected]> Reviewed-by: Preetham Poluparthi <[email protected]> Tested-by: Preetham Poluparthi <[email protected]> --- .../data/parquet/01/array_of_primitives.parquet | Bin 0 -> 549 bytes .../01/array_of_single_field_struct.parquet | Bin 0 -> 570 bytes .../parquet/01/array_of_structs_with_nulls.parquet | Bin 0 -> 897 bytes .../data/parquet/01/repeated_field.parquet | Bin 0 -> 511 bytes .../data/parquet/01/repeated_struct.parquet | Bin 0 -> 705 bytes .../external_dataset/ExternalDatasetTestUtils.java | 13 + .../parquet/BinaryFileConverterUtil.java | 1 + .../parquet-null3/parquet-null3.01.ddl.sqlpp | 28 + .../parquet-null3/parquet-null3.02.update.sqlpp | 1735 ++++++++++++++++++++ .../parquet-null3/parquet-null3.03.ddl.sqlpp | 31 + .../parquet-null3/parquet-null3.04.query.sqlpp | 25 + .../null-in-array/null-in-array.01.ddl.sqlpp | 35 + .../null-in-array/null-in-array.02.query.sqlpp | 22 + .../null-in-array/null-in-array.99.ddl.sqlpp | 20 + .../parquet-null-type/parquet-null-type.05.adm | 10 +- .../parquet-null-type/parquet-null-type.08.adm | 10 +- .../copy-to/parquet-null2/parquet-null2.04.adm | 2 +- .../copy-to/parquet-null3/parquet-null3.04.adm | 50 + .../parquet-type-hierarchy.05.adm | 2 +- .../parquet/embed-one-value/one-field.110.adm | 2 +- .../array-access-pushdown.02.adm | 2 +- .../array-access-pushdown.04.adm | 2 +- .../array-access-pushdown.06.adm | 1 + .../array-access-pushdown.08.adm | 1 + .../parquet/missing-fields/missing-fields.3.adm | 4 +- .../parquet/null-in-array/null-in-array.02.adm | 10 + .../parquet-types/unset-flags/unset-flags.02.adm | 2 +- .../select-all-fields/select-all-fields.2.adm | 2 +- .../select-all-fields/select-all-fields.3.adm | 2 +- .../parquet/type-mismatch/type-mismatch.02.adm | 4 +- .../parquet/type-mismatch/type-mismatch.04.adm | 4 +- .../runtimets/testsuite_external_dataset_s3.xml | 16 + .../parquet/converter/ParquetConverterContext.java | 12 + .../parquet/converter/nested/ObjectConverter.java | 32 + .../converter/nested/ObjectRepeatedConverter.java | 32 +- .../converter/nested/RepeatedConverter.java | 11 +- .../primitve/PrimitiveRepeatedConverter.java | 2 +- 37 files changed, 2096 insertions(+), 29 deletions(-) diff --git a/asterixdb/asterix-app/data/parquet/01/array_of_primitives.parquet b/asterixdb/asterix-app/data/parquet/01/array_of_primitives.parquet new file mode 100644 index 0000000000..83ea34678e Binary files /dev/null and b/asterixdb/asterix-app/data/parquet/01/array_of_primitives.parquet differ diff --git a/asterixdb/asterix-app/data/parquet/01/array_of_single_field_struct.parquet b/asterixdb/asterix-app/data/parquet/01/array_of_single_field_struct.parquet new file mode 100644 index 0000000000..0f52897c9c Binary files /dev/null and b/asterixdb/asterix-app/data/parquet/01/array_of_single_field_struct.parquet differ diff --git a/asterixdb/asterix-app/data/parquet/01/array_of_structs_with_nulls.parquet b/asterixdb/asterix-app/data/parquet/01/array_of_structs_with_nulls.parquet new file mode 100644 index 0000000000..85414329f7 Binary files /dev/null and b/asterixdb/asterix-app/data/parquet/01/array_of_structs_with_nulls.parquet differ diff --git a/asterixdb/asterix-app/data/parquet/01/repeated_field.parquet b/asterixdb/asterix-app/data/parquet/01/repeated_field.parquet new file mode 100644 index 0000000000..a99eba6879 Binary files /dev/null and b/asterixdb/asterix-app/data/parquet/01/repeated_field.parquet differ diff --git a/asterixdb/asterix-app/data/parquet/01/repeated_struct.parquet b/asterixdb/asterix-app/data/parquet/01/repeated_struct.parquet new file mode 100644 index 0000000000..c25d524905 Binary files /dev/null and b/asterixdb/asterix-app/data/parquet/01/repeated_struct.parquet differ diff --git a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/ExternalDatasetTestUtils.java b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/ExternalDatasetTestUtils.java index d31fb66a27..8a94453d2f 100644 --- a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/ExternalDatasetTestUtils.java +++ b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/ExternalDatasetTestUtils.java @@ -25,6 +25,7 @@ import static org.apache.asterix.test.external_dataset.aws.AwsS3ExternalDatasetT import static org.apache.asterix.test.external_dataset.aws.AwsS3ExternalDatasetTest.FIXED_DATA_CONTAINER; import static org.apache.asterix.test.external_dataset.deltalake.DeltaTableGenerator.DELTA_GEN_BASEDIR; import static org.apache.asterix.test.external_dataset.parquet.BinaryFileConverterUtil.BINARY_GEN_BASEDIR; +import static org.apache.asterix.test.external_dataset.parquet.BinaryFileConverterUtil.PARQUET_BASEDIR; import java.io.BufferedWriter; import java.io.File; @@ -72,6 +73,7 @@ public class ExternalDatasetTestUtils { public static final String TSV_DEFINITION = "tsv-data/reviews/"; public static final String MIXED_DEFINITION = "mixed-data/reviews/"; public static final String PARQUET_DEFINITION = "parquet-data/reviews/"; + public static final String PARQUET_NULL_TEST_DIRECTORY = "parquet-data/null-test/"; public static final String AVRO_DEFINITION = "avro-data/reviews/"; // This is used for a test to generate over 1000 number of files @@ -398,6 +400,17 @@ public class ExternalDatasetTestUtils { loadData(generatedDataBasePath, "", "parquetTypes.parquet", definition, definitionSegment, false, false); loadData(generatedDataBasePath, "", "friends.parquet", definition, definitionSegment, false, false); + loadData(PARQUET_BASEDIR, "", "array_of_primitives.parquet", PARQUET_NULL_TEST_DIRECTORY, definitionSegment, + false, false); + loadData(PARQUET_BASEDIR, "", "array_of_single_field_struct.parquet", PARQUET_NULL_TEST_DIRECTORY, + definitionSegment, false, false); + loadData(PARQUET_BASEDIR, "", "array_of_structs_with_nulls.parquet", PARQUET_NULL_TEST_DIRECTORY, + definitionSegment, false, false); + loadData(PARQUET_BASEDIR, "", "repeated_field.parquet", PARQUET_NULL_TEST_DIRECTORY, definitionSegment, false, + false); + loadData(PARQUET_BASEDIR, "", "repeated_struct.parquet", PARQUET_NULL_TEST_DIRECTORY, definitionSegment, false, + false); + Collection<File> files = IoUtil.getMatchingFiles(Paths.get(generatedDataBasePath + "/external-filter"), PARQUET_FILTER); for (File file : files) { diff --git a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/parquet/BinaryFileConverterUtil.java b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/parquet/BinaryFileConverterUtil.java index 93ac24eedc..3b602dc28d 100644 --- a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/parquet/BinaryFileConverterUtil.java +++ b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/parquet/BinaryFileConverterUtil.java @@ -39,6 +39,7 @@ import tech.allegro.schema.json2avro.converter.JsonAvroConverter; public class BinaryFileConverterUtil { public static final String DEFAULT_PARQUET_SRC_PATH = "data/hdfs/parquet"; public static final String BINARY_GEN_BASEDIR = "target" + File.separatorChar + "generated_bin_files"; + public static final String PARQUET_BASEDIR = "data" + File.separatorChar + "parquet" + File.separatorChar + "01"; //How many records should the schema inference method inspect to infer the schema for parquet files private static final int NUM_OF_RECORDS_SCHEMA = 20; diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null3/parquet-null3.01.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null3/parquet-null3.01.ddl.sqlpp new file mode 100644 index 0000000000..8bac75ecc2 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null3/parquet-null3.01.ddl.sqlpp @@ -0,0 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +DROP DATAVERSE test if exists; +CREATE DATAVERSE test; +USE test; + +CREATE TYPE ColumnType2 AS { + }; + + +CREATE DATASET col2 primary key(id:int); diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null3/parquet-null3.02.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null3/parquet-null3.02.update.sqlpp new file mode 100644 index 0000000000..0e580c2c6c --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null3/parquet-null3.02.update.sqlpp @@ -0,0 +1,1735 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +USE test; + + +INSERT INTO col2 ( + [ +{ + "id": 1, + "name": "Alice Johnson", + "age": 28, + "address": { + "street": "123 Main St", + "city": "New York", + "coordinates": { + "lat": 40.7128, + "lon": -74.0060 + } + }, + "tags": ["premium", "verified", "active"], + "orders": [ + { + "orderId": 1001, + "items": [ + { + "productId": 501, + "quantity": 2, + "metadata": { + "category": "electronics", + "attributes": { + "color": "black", + "size": "medium" + } + } + } + ], + "total": 299.99 + } + ], + "preferences": { + "notifications": { + "email": true, + "sms": false + }, + "theme": "dark" + }, + "scores": [85.5, 92.3, 78.9] + }, + { + "id": 2, + "name": "Bob Smith", + "age": null, + "address": { + "street": null, + "city": "Los Angeles", + "coordinates": { + "lat": 34.0522, + "lon": null + } + }, + "tags": ["active"], + "orders": [], + "preferences": { + "notifications": { + "email": null, + "sms": true + }, + "theme": null + }, + "scores": [88.0, null, 95.5] + }, + { + "id": 3, + "name": "Carol Davis", + "age": 35, + "address": null, + "tags": [], + "orders": [ + { + "orderId": 1002, + "items": [ + { + "productId": null, + "quantity": 1, + "metadata": { + "category": "books", + "attributes": null + } + } + ], + "total": 15.99 + } + ], + "preferences": { + "notifications": null, + "theme": "light" + }, + "scores": [] + }, + { + "id": 4, + "name": "David Wilson", + "age": 42, + "address": { + "street": "456 Oak Ave", + "city": "Chicago", + "coordinates": null + }, + "tags": null, + "orders": [ + { + "orderId": 1003, + "items": [], + "total": 0.0 + } + ], + "preferences": null, + "scores": [70.5, 75.0, 80.5, 85.0] + }, + { + "id": 5, + "name": "Emma Brown", + "age": 31, + "address": { + "street": "789 Pine Rd", + "city": "Houston", + "coordinates": { + "lat": null, + "lon": null + } + }, + "tags": ["verified", null, "new"], + "orders": [ + { + "orderId": 1004, + "items": [ + { + "productId": 502, + "quantity": null, + "metadata": null + } + ], + "total": null + } + ], + "preferences": { + "notifications": { + "email": false, + "sms": false + }, + "theme": "auto" + }, + "scores": null + }, + { + "id": 6, + "name": "Frank Miller", + "age": 29, + "address": { + "street": "321 Elm St", + "city": "Phoenix", + "coordinates": { + "lat": 33.4484, + "lon": -112.0740 + } + }, + "tags": ["premium"], + "orders": [ + { + "orderId": 1005, + "items": [ + { + "productId": 503, + "quantity": 3, + "metadata": { + "category": null, + "attributes": { + "color": "blue", + "size": null + } + } + }, + null + ], + "total": 450.00 + } + ], + "preferences": { + "notifications": { + "email": true, + "sms": true + }, + "theme": "dark" + }, + "scores": [90.0, 85.5] + }, + { + "id": 7, + "name": "Grace Lee", + "age": null, + "address": { + "street": "654 Maple Dr", + "city": "Philadelphia", + "coordinates": { + "lat": 39.9526, + "lon": -75.1652 + } + }, + "tags": [], + "orders": null, + "preferences": { + "notifications": { + "email": null, + "sms": null + }, + "theme": "light" + }, + "scores": [null, null, null] + }, + { + "id": 8, + "name": "Henry Garcia", + "age": 38, + "address": { + "street": null, + "city": "San Antonio", + "coordinates": null + }, + "tags": ["active", "verified"], + "orders": [ + { + "orderId": 1006, + "items": [ + { + "productId": 504, + "quantity": 1, + "metadata": { + "category": "clothing", + "attributes": { + "color": null, + "size": "large" + } + } + }, + { + "productId": 505, + "quantity": 2, + "metadata": { + "category": "accessories", + "attributes": { + "color": "red", + "size": "small" + } + } + } + ], + "total": 125.50 + }, + null + ], + "preferences": { + "notifications": { + "email": true, + "sms": false + }, + "theme": null + }, + "scores": [77.5] + }, + { + "id": 9, + "name": "Ivy Martinez", + "age": 26, + "address": { + "street": "987 Cedar Ln", + "city": "San Diego", + "coordinates": { + "lat": 32.7157, + "lon": -117.1611 + } + }, + "tags": ["new"], + "orders": [ + { + "orderId": 1007, + "items": [ + { + "productId": null, + "quantity": null, + "metadata": { + "category": null, + "attributes": null + } + } + ], + "total": 0.0 + } + ], + "preferences": null, + "scores": [] + }, + { + "id": 10, + "name": "Jack Robinson", + "age": 45, + "address": null, + "tags": ["premium", "verified", "active", "vip"], + "orders": [ + { + "orderId": 1008, + "items": [ + { + "productId": 506, + "quantity": 5, + "metadata": { + "category": "electronics", + "attributes": { + "color": "silver", + "size": "extra-large" + } + } + } + ], + "total": 999.99 + }, + { + "orderId": 1009, + "items": [], + "total": null + } + ], + "preferences": { + "notifications": { + "email": true, + "sms": true + }, + "theme": "dark" + }, + "scores": [95.0, 98.5, 92.0, 89.5] + }, + { + "id": 11, + "name": "Karen White", + "age": null, + "address": { + "street": "147 Birch St", + "city": "Dallas", + "coordinates": { + "lat": null, + "lon": -96.7970 + } + }, + "tags": null, + "orders": [], + "preferences": { + "notifications": null, + "theme": "auto" + }, + "scores": [null] + }, + { + "id": 12, + "name": "Leo Harris", + "age": 33, + "address": { + "street": "258 Spruce Ave", + "city": "San Jose", + "coordinates": { + "lat": 37.3382, + "lon": null + } + }, + "tags": ["active"], + "orders": [ + { + "orderId": 1010, + "items": null, + "total": 50.00 + } + ], + "preferences": { + "notifications": { + "email": false, + "sms": null + }, + "theme": "light" + }, + "scores": [82.5, 87.0] + }, + { + "id": 13, + "name": "Mia Clark", + "age": 27, + "address": { + "street": "369 Willow Way", + "city": "Austin", + "coordinates": { + "lat": 30.2672, + "lon": -97.7431 + } + }, + "tags": ["verified", "new"], + "orders": [ + { + "orderId": 1011, + "items": [ + { + "productId": 507, + "quantity": 1, + "metadata": { + "category": "home", + "attributes": { + "color": "white", + "size": null + } + } + }, + { + "productId": null, + "quantity": 2, + "metadata": null + } + ], + "total": 175.25 + } + ], + "preferences": { + "notifications": { + "email": true, + "sms": false + }, + "theme": "dark" + }, + "scores": null + }, + { + "id": 14, + "name": "Noah Lewis", + "age": 40, + "address": null, + "tags": [], + "orders": null, + "preferences": null, + "scores": [] + }, + { + "id": 15, + "name": "Olivia Walker", + "age": null, + "address": { + "street": null, + "city": "Jacksonville", + "coordinates": null + }, + "tags": ["premium"], + "orders": [ + { + "orderId": 1012, + "items": [ + { + "productId": 508, + "quantity": 4, + "metadata": { + "category": "sports", + "attributes": { + "color": "green", + "size": "medium" + } + } + } + ], + "total": 320.00 + } + ], + "preferences": { + "notifications": { + "email": null, + "sms": true + }, + "theme": null + }, + "scores": [88.0, 91.5, null, 86.0] + }, + { + "id": 16, + "name": "Peter Hall", + "age": 36, + "address": { + "street": "741 Ash Blvd", + "city": "Fort Worth", + "coordinates": { + "lat": 32.7555, + "lon": -97.3308 + } + }, + "tags": ["active", null], + "orders": [], + "preferences": { + "notifications": { + "email": false, + "sms": false + }, + "theme": "auto" + }, + "scores": [null, 79.5, 84.0] + }, + { + "id": 17, + "name": "Quinn Allen", + "age": 32, + "address": { + "street": "852 Poplar Ct", + "city": "Columbus", + "coordinates": { + "lat": 39.9612, + "lon": null + } + }, + "tags": null, + "orders": [ + { + "orderId": 1013, + "items": [ + { + "productId": 509, + "quantity": null, + "metadata": { + "category": null, + "attributes": { + "color": null, + "size": null + } + } + } + ], + "total": null + } + ], + "preferences": { + "notifications": null, + "theme": "light" + }, + "scores": [] + }, + { + "id": 18, + "name": "Rachel Young", + "age": 29, + "address": { + "street": "963 Fir Rd", + "city": "Charlotte", + "coordinates": { + "lat": null, + "lon": null + } + }, + "tags": ["verified"], + "orders": [ + { + "orderId": 1014, + "items": [ + { + "productId": 510, + "quantity": 3, + "metadata": { + "category": "electronics", + "attributes": null + } + }, + null, + { + "productId": 511, + "quantity": 1, + "metadata": { + "category": "books", + "attributes": { + "color": null, + "size": "standard" + } + } + } + ], + "total": 215.75 + } + ], + "preferences": { + "notifications": { + "email": true, + "sms": true + }, + "theme": "dark" + }, + "scores": [93.5, 89.0] + }, + { + "id": 19, + "name": "Samuel King", + "age": null, + "address": null, + "tags": [], + "orders": [ + { + "orderId": 1015, + "items": [], + "total": 0.0 + } + ], + "preferences": null, + "scores": null + }, + { + "id": 20, + "name": "Tina Wright", + "age": 34, + "address": { + "street": "159 Hickory Pl", + "city": "Indianapolis", + "coordinates": { + "lat": 39.7684, + "lon": -86.1581 + } + }, + "tags": ["premium", "active"], + "orders": [ + { + "orderId": 1016, + "items": [ + { + "productId": 512, + "quantity": 2, + "metadata": { + "category": "clothing", + "attributes": { + "color": "yellow", + "size": "small" + } + } + } + ], + "total": 89.99 + }, + { + "orderId": 1017, + "items": [ + { + "productId": null, + "quantity": null, + "metadata": null + } + ], + "total": null + } + ], + "preferences": { + "notifications": { + "email": false, + "sms": true + }, + "theme": "light" + }, + "scores": [76.5, null, 81.0] + }, + { + "id": 21, + "name": "Uma Scott", + "age": 41, + "address": { + "street": null, + "city": "Seattle", + "coordinates": { + "lat": 47.6062, + "lon": null + } + }, + "tags": ["new", "verified"], + "orders": null, + "preferences": { + "notifications": { + "email": null, + "sms": null + }, + "theme": null + }, + "scores": [90.5] + }, + { + "id": 22, + "name": "Victor Green", + "age": 37, + "address": { + "street": "753 Walnut St", + "city": "Denver", + "coordinates": null + }, + "tags": null, + "orders": [ + { + "orderId": 1018, + "items": [ + { + "productId": 513, + "quantity": 5, + "metadata": { + "category": "toys", + "attributes": { + "color": "multi", + "size": null + } + } + }, + { + "productId": 514, + "quantity": 1, + "metadata": { + "category": null, + "attributes": null + } + } + ], + "total": 425.50 + } + ], + "preferences": { + "notifications": { + "email": true, + "sms": false + }, + "theme": "auto" + }, + "scores": [] + }, + { + "id": 23, + "name": "Wendy Baker", + "age": null, + "address": { + "street": "864 Cherry Ave", + "city": "Boston", + "coordinates": { + "lat": 42.3601, + "lon": -71.0589 + } + }, + "tags": ["active"], + "orders": [], + "preferences": null, + "scores": [null, null] + }, + { + "id": 24, + "name": "Xavier Adams", + "age": 30, + "address": { + "street": "975 Beech Dr", + "city": "Nashville", + "coordinates": { + "lat": null, + "lon": -86.7816 + } + }, + "tags": ["premium", null, "verified"], + "orders": [ + { + "orderId": 1019, + "items": null, + "total": 150.00 + } + ], + "preferences": { + "notifications": { + "email": false, + "sms": null + }, + "theme": "dark" + }, + "scores": [85.0, 88.5, 92.0] + }, + { + "id": 25, + "name": "Yara Nelson", + "age": 28, + "address": null, + "tags": [], + "orders": [ + { + "orderId": 1020, + "items": [ + { + "productId": 515, + "quantity": 1, + "metadata": { + "category": "food", + "attributes": { + "color": null, + "size": null + } + } + } + ], + "total": 25.99 + } + ], + "preferences": { + "notifications": { + "email": true, + "sms": true + }, + "theme": "light" + }, + "scores": null + }, + { + "id": 26, + "name": "Zack Carter", + "age": 43, + "address": { + "street": "147 Redwood Ln", + "city": "Detroit", + "coordinates": { + "lat": 42.3314, + "lon": -83.0458 + } + }, + "tags": ["verified", "vip"], + "orders": [ + { + "orderId": 1021, + "items": [ + { + "productId": 516, + "quantity": 2, + "metadata": { + "category": "electronics", + "attributes": { + "color": "black", + "size": "large" + } + } + }, + { + "productId": 517, + "quantity": null, + "metadata": { + "category": "accessories", + "attributes": null + } + } + ], + "total": 550.00 + }, + null + ], + "preferences": { + "notifications": { + "email": null, + "sms": false + }, + "theme": null + }, + "scores": [97.5, 94.0, 91.5] + }, + { + "id": 27, + "name": "Amy Mitchell", + "age": null, + "address": { + "street": null, + "city": "Memphis", + "coordinates": null + }, + "tags": null, + "orders": [], + "preferences": { + "notifications": null, + "theme": "auto" + }, + "scores": [] + }, + { + "id": 28, + "name": "Ben Perez", + "age": 35, + "address": { + "street": "258 Magnolia Rd", + "city": "Portland", + "coordinates": { + "lat": 45.5152, + "lon": null + } + }, + "tags": ["active", "new"], + "orders": [ + { + "orderId": 1022, + "items": [ + { + "productId": null, + "quantity": 3, + "metadata": null + } + ], + "total": null + } + ], + "preferences": { + "notifications": { + "email": true, + "sms": false + }, + "theme": "dark" + }, + "scores": [80.0, 83.5] + }, + { + "id": 29, + "name": "Clara Roberts", + "age": 39, + "address": { + "street": "369 Sycamore Way", + "city": "Las Vegas", + "coordinates": { + "lat": 36.1699, + "lon": -115.1398 + } + }, + "tags": ["premium"], + "orders": [ + { + "orderId": 1023, + "items": [ + { + "productId": 518, + "quantity": 1, + "metadata": { + "category": "jewelry", + "attributes": { + "color": "gold", + "size": "one-size" + } + } + }, + null, + { + "productId": 519, + "quantity": 2, + "metadata": { + "category": "jewelry", + "attributes": { + "color": "silver", + "size": null + } + } + } + ], + "total": 850.00 + } + ], + "preferences": null, + "scores": [null, 95.0, null] + }, + { + "id": 30, + "name": "Derek Turner", + "age": 31, + "address": null, + "tags": [], + "orders": null, + "preferences": { + "notifications": { + "email": false, + "sms": false + }, + "theme": "light" + }, + "scores": [78.5, 82.0, 85.5, 87.0] + }, + { + "id": 31, + "name": "Eva Phillips", + "age": null, + "address": { + "street": "741 Cottonwood Dr", + "city": "Baltimore", + "coordinates": { + "lat": null, + "lon": null + } + }, + "tags": ["verified"], + "orders": [ + { + "orderId": 1024, + "items": [], + "total": 0.0 + } + ], + "preferences": { + "notifications": { + "email": null, + "sms": true + }, + "theme": null + }, + "scores": null + }, + { + "id": 32, + "name": "Fred Campbell", + "age": 44, + "address": { + "street": "852 Dogwood Ave", + "city": "Milwaukee", + "coordinates": { + "lat": 43.0389, + "lon": -87.9065 + } + }, + "tags": null, + "orders": [ + { + "orderId": 1025, + "items": [ + { + "productId": 520, + "quantity": 4, + "metadata": { + "category": "garden", + "attributes": null + } + } + ], + "total": 199.99 + }, + { + "orderId": 1026, + "items": null, + "total": 75.50 + } + ], + "preferences": { + "notifications": { + "email": true, + "sms": true + }, + "theme": "auto" + }, + "scores": [89.0] + }, + { + "id": 33, + "name": "Gina Parker", + "age": 26, + "address": { + "street": null, + "city": "Albuquerque", + "coordinates": { + "lat": 35.0844, + "lon": null + } + }, + "tags": ["new", null, "active"], + "orders": [], + "preferences": null, + "scores": [] + }, + { + "id": 34, + "name": "Hank Evans", + "age": 38, + "address": { + "street": "963 Laurel Ct", + "city": "Tucson", + "coordinates": null + }, + "tags": ["premium", "verified"], + "orders": [ + { + "orderId": 1027, + "items": [ + { + "productId": 521, + "quantity": 1, + "metadata": { + "category": "automotive", + "attributes": { + "color": "gray", + "size": "universal" + } + } + }, + { + "productId": null, + "quantity": null, + "metadata": { + "category": null, + "attributes": { + "color": null, + "size": null + } + } + } + ], + "total": 375.00 + } + ], + "preferences": { + "notifications": { + "email": false, + "sms": null + }, + "theme": "dark" + }, + "scores": [86.5, 90.0, null] + }, + { + "id": 35, + "name": "Iris Edwards", + "age": null, + "address": null, + "tags": [], + "orders": [ + { + "orderId": 1028, + "items": [ + { + "productId": 522, + "quantity": 2, + "metadata": null + } + ], + "total": 55.00 + } + ], + "preferences": { + "notifications": { + "email": true, + "sms": false + }, + "theme": "light" + }, + "scores": [null] + }, + { + "id": 36, + "name": "Jake Collins", + "age": 33, + "address": { + "street": "159 Juniper Ln", + "city": "Fresno", + "coordinates": { + "lat": 36.7378, + "lon": -119.7871 + } + }, + "tags": ["active"], + "orders": null, + "preferences": { + "notifications": null, + "theme": null + }, + "scores": [92.5, 88.0, 85.5] + }, + { + "id": 37, + "name": "Kara Stewart", + "age": 29, + "address": { + "street": "753 Cypress Rd", + "city": "Sacramento", + "coordinates": { + "lat": null, + "lon": -121.4944 + } + }, + "tags": null, + "orders": [ + { + "orderId": 1029, + "items": [ + { + "productId": 523, + "quantity": 3, + "metadata": { + "category": "office", + "attributes": { + "color": "blue", + "size": null + } + } + }, + null + ], + "total": 125.00 + } + ], + "preferences": { + "notifications": { + "email": null, + "sms": null + }, + "theme": "auto" + }, + "scores": [] + }, + { + "id": 38, + "name": "Liam Sanchez", + "age": 42, + "address": { + "street": "864 Alder Blvd", + "city": "Long Beach", + "coordinates": { + "lat": 33.7701, + "lon": null + } + }, + "tags": ["verified", "premium"], + "orders": [ + { + "orderId": 1030, + "items": [], + "total": null + } + ], + "preferences": null, + "scores": null + }, + { + "id": 39, + "name": "Maya Morris", + "age": null, + "address": { + "street": null, + "city": "Kansas City", + "coordinates": null + }, + "tags": [], + "orders": [ + { + "orderId": 1031, + "items": [ + { + "productId": 524, + "quantity": null, + "metadata": { + "category": "pet", + "attributes": null + } + } + ], + "total": 45.99 + } + ], + "preferences": { + "notifications": { + "email": false, + "sms": true + }, + "theme": "dark" + }, + "scores": [null, null, 87.5] + }, + { + "id": 40, + "name": "Nate Rogers", + "age": 36, + "address": { + "street": "975 Hawthorn Pl", + "city": "Mesa", + "coordinates": { + "lat": 33.4152, + "lon": -111.8315 + } + }, + "tags": ["new"], + "orders": [ + { + "orderId": 1032, + "items": [ + { + "productId": 525, + "quantity": 1, + "metadata": { + "category": "music", + "attributes": { + "color": null, + "size": "standard" + } + } + }, + { + "productId": 526, + "quantity": 2, + "metadata": null + } + ], + "total": 220.00 + }, + null + ], + "preferences": { + "notifications": { + "email": true, + "sms": null + }, + "theme": "light" + }, + "scores": [83.0, 87.5] + }, + { + "id": 41, + "name": "Olive Reed", + "age": 27, + "address": null, + "tags": null, + "orders": [], + "preferences": { + "notifications": { + "email": null, + "sms": false + }, + "theme": null + }, + "scores": [] + }, + { + "id": 42, + "name": "Paul Cook", + "age": null, + "address": { + "street": "147 Hemlock St", + "city": "Atlanta", + "coordinates": { + "lat": 33.7490, + "lon": -84.3880 + } + }, + "tags": ["active", "verified"], + "orders": [ + { + "orderId": 1033, + "items": [ + { + "productId": null, + "quantity": 5, + "metadata": { + "category": null, + "attributes": { + "color": "orange", + "size": null + } + } + } + ], + "total": null + } + ], + "preferences": { + "notifications": { + "email": true, + "sms": true + }, + "theme": "auto" + }, + "scores": [91.0, null, 94.5] + }, + { + "id": 43, + "name": "Quincy Bell", + "age": 34, + "address": { + "street": "258 Sequoia Ave", + "city": "Colorado Springs", + "coordinates": { + "lat": null, + "lon": null + } + }, + "tags": ["premium"], + "orders": null, + "preferences": null, + "scores": [75.5, 80.0, 84.5] + }, + { + "id": 44, + "name": "Rita Morgan", + "age": 40, + "address": { + "street": null, + "city": "Raleigh", + "coordinates": { + "lat": 35.7796, + "lon": null + } + }, + "tags": [], + "orders": [ + { + "orderId": 1034, + "items": [ + { + "productId": 527, + "quantity": 1, + "metadata": { + "category": "beauty", + "attributes": { + "color": "pink", + "size": "travel" + } + } + }, + null, + { + "productId": 528, + "quantity": null, + "metadata": { + "category": "beauty", + "attributes": null + } + } + ], + "total": 135.50 + } + ], + "preferences": { + "notifications": { + "email": false, + "sms": false + }, + "theme": "dark" + }, + "scores": null + }, + { + "id": 45, + "name": "Steve Bailey", + "age": null, + "address": { + "street": "369 Buckeye Dr", + "city": "Omaha", + "coordinates": null + }, + "tags": ["verified", null], + "orders": [ + { + "orderId": 1035, + "items": null, + "total": 99.99 + } + ], + "preferences": { + "notifications": null, + "theme": "light" + }, + "scores": [] + }, + { + "id": 46, + "name": "Tara Rivera", + "age": 32, + "address": { + "street": "741 Chestnut Way", + "city": "Miami", + "coordinates": { + "lat": 25.7617, + "lon": -80.1918 + } + }, + "tags": ["new", "active"], + "orders": [ + { + "orderId": 1036, + "items": [ + { + "productId": 529, + "quantity": 2, + "metadata": { + "category": "fitness", + "attributes": { + "color": null, + "size": "medium" + } + } + } + ], + "total": 180.00 + }, + { + "orderId": 1037, + "items": [], + "total": 0.0 + } + ], + "preferences": { + "notifications": { + "email": true, + "sms": false + }, + "theme": null + }, + "scores": [88.5, 92.0] + }, + { + "id": 47, + "name": "Ursula Cooper", + "age": 37, + "address": null, + "tags": null, + "orders": null, + "preferences": null, + "scores": null + }, + { + "id": 48, + "name": "Vince Richardson", + "age": 30, + "address": { + "street": "852 Palmetto Ln", + "city": "Oakland", + "coordinates": { + "lat": 37.8044, + "lon": null + } + }, + "tags": ["premium", "verified"], + "orders": [ + { + "orderId": 1038, + "items": [ + { + "productId": 530, + "quantity": 3, + "metadata": { + "category": "tech", + "attributes": { + "color": "white", + "size": null + } + } + }, + { + "productId": null, + "quantity": 1, + "metadata": null + } + ], + "total": 675.00 + } + ], + "preferences": { + "notifications": { + "email": null, + "sms": true + }, + "theme": "auto" + }, + "scores": [93.0, null, 89.5] + }, + { + "id": 49, + "name": "Wanda Cox", + "age": null, + "address": { + "street": null, + "city": "Minneapolis", + "coordinates": { + "lat": null, + "lon": -93.2650 + } + }, + "tags": [], + "orders": [ + { + "orderId": 1039, + "items": [ + { + "productId": 531, + "quantity": null, + "metadata": { + "category": "craft", + "attributes": { + "color": "multi", + "size": "kit" + } + } + } + ], + "total": null + } + ], + "preferences": { + "notifications": { + "email": false, + "sms": null + }, + "theme": "dark" + }, + "scores": [null, 86.0] + }, + { + "id": 50, + "name": "Xander Howard", + "age": 35, + "address": { + "street": "963 Cedarwood Ct", + "city": "Tulsa", + "coordinates": { + "lat": 36.1540, + "lon": -95.9928 + } + }, + "tags": ["active", "vip", "premium"], + "orders": [ + { + "orderId": 1040, + "items": [ + { + "productId": 532, + "quantity": 10, + "metadata": { + "category": "wholesale", + "attributes": { + "color": "assorted", + "size": "bulk" + } + } + }, + { + "productId": 533, + "quantity": 5, + "metadata": { + "category": "wholesale", + "attributes": { + "color": null, + "size": "bulk" + } + } + } + ], + "total": 1250.00 + }, + { + "orderId": 1041, + "items": null, + "total": 500.00 + } + ], + "preferences": { + "notifications": { + "email": true, + "sms": true + }, + "theme": "dark" + }, + "scores": [98.5, 96.0, 94.5, 92.0] + } + ] +); + +COPY ( +select id, name, age, address, tags, orders, preferences, scores from col2 + ) toWriter +TO %adapter% +PATH (%pathprefix% "copy-to-result", "parquet-null3") +TYPE ( +{ + id : int, + name : string, + age : int, + address : { + street : string, + city : string, + coordinates : { + lat : double, + lon : double + } + }, + tags : [string], + orders : [ + { + orderId : int, + total : double, + items : [ + { + productId : int, + quantity : int, + metadata : { + category : string, + +attributes : { + color : string, + size : string + } + } + } + ] + } + ], + preferences : { + + + notifications : { + email : boolean, + sms : boolean + }, + theme : string + + }, + scores : [double] +} + +) +WITH { + %template_colons%, + %additionalProperties% + "format":"parquet", + "version" : "2" + }; \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null3/parquet-null3.03.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null3/parquet-null3.03.ddl.sqlpp new file mode 100644 index 0000000000..5722f0ba50 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null3/parquet-null3.03.ddl.sqlpp @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +USE test; + + +CREATE EXTERNAL DATASET DatasetCopy(ColumnType2) USING %adapter% +( + %template%, + %additional_Properties%, + ("definition"="%path_prefix%copy-to-result/parquet-null3"), + ("format" = "parquet"), + ("requireVersionChangeDetection"="false"), + ("include"="*.parquet") +); \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null3/parquet-null3.04.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null3/parquet-null3.04.query.sqlpp new file mode 100644 index 0000000000..cd2fa48e1e --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null3/parquet-null3.04.query.sqlpp @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +USE test; + + +SELECT c.* +FROM DatasetCopy c +ORDER BY id; diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/null-in-array/null-in-array.01.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/null-in-array/null-in-array.01.ddl.sqlpp new file mode 100644 index 0000000000..a06e66f4ed --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/null-in-array/null-in-array.01.ddl.sqlpp @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +DROP DATAVERSE test IF EXISTS; +CREATE DATAVERSE test; + +USE test; + +CREATE TYPE ParquetType as { +}; + +CREATE EXTERNAL DATASET TestDataset(ParquetType) USING %adapter% +( + %template%, + ("container"="playground"), + ("definition"="parquet-data/null-test"), + ("include"="*.parquet"), + ("format" = "parquet") +); \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/null-in-array/null-in-array.02.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/null-in-array/null-in-array.02.query.sqlpp new file mode 100644 index 0000000000..d295b02f4a --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/null-in-array/null-in-array.02.query.sqlpp @@ -0,0 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +USE test; + +SELECT VALUE v FROM TestDataset v ORDER BY v.id; \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/null-in-array/null-in-array.99.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/null-in-array/null-in-array.99.ddl.sqlpp new file mode 100644 index 0000000000..36b2bab543 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/null-in-array/null-in-array.99.ddl.sqlpp @@ -0,0 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +DROP DATAVERSE test IF EXISTS; \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-null-type/parquet-null-type.05.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-null-type/parquet-null-type.05.adm index 763b652306..9f14133972 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-null-type/parquet-null-type.05.adm +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-null-type/parquet-null-type.05.adm @@ -1,5 +1,5 @@ -{ "id": 8, "nested": { "List": [ 100, 300 ] }, "obj_array": [ { "first": "first" }, { }, { "first": "second" } ], "a": { }, "arr": [ [ 1, 2 ] ] } -{ "id": 10, "name": "Virat", "nested": { "List": [ ] }, "obj_array": [ { "first": "first" }, { "first": "second" } ], "a": { }, "c": { }, "f": [ ], "arr": [ [ 1, 2 ], [ ] ] } -{ "id": 28, "name": "Virat", "nested": { "List": [ ], "A": "a" }, "obj_array": [ { "first": "first" }, { "first": "second" } ], "a": { "b": 1 }, "c": { "d": 1 }, "f": [ 1.0 ], "arr": [ [ 1, 2 ], [ ] ] } -{ "id": 34, "nested": { "randomK": "randomV" }, "obj_array": [ { "first": "first" }, { "first": "second" } ], "c": { "e": 1 }, "f": [ 2.0, 3.0 ], "arr": [ [ ] ] } -{ "id": 37, "name": "Kohli", "nested": { "List": [ 1, 2, 3 ], "A": "a" }, "obj_array": [ { "first": "first" }, { "first": "second" } ], "a": { "b": 1 }, "c": { "d": 1, "e": 1 }, "f": [ 3.5999999046325684, 4.0 ], "arr": [ [ 1, 2, 3 ] ] } +{ "id": 8, "nested": { "List": [ 100, null, 300 ], "A": null, "randomK": null }, "obj_array": [ { "first": "first" }, { "first": null }, { "first": "second" } ], "a": { "b": null }, "arr": [ [ 1, null, 2 ], null ], "name": null, "c": null, "f": null } +{ "id": 10, "name": "Virat", "nested": { "List": [ ], "A": null, "randomK": null }, "obj_array": [ { "first": "first" }, { "first": "second" } ], "a": { "b": null }, "c": { "d": null, "e": null }, "f": [ ], "arr": [ [ 1, 2 ], [ ] ] } +{ "id": 28, "name": "Virat", "nested": { "List": [ null ], "A": "a", "randomK": null }, "obj_array": [ { "first": "first" }, { "first": "second" } ], "a": { "b": 1 }, "c": { "d": 1, "e": null }, "f": [ 1.0, null ], "arr": [ [ 1, 2 ], [ null ] ] } +{ "id": 34, "nested": { "randomK": "randomV", "List": null, "A": null }, "obj_array": [ { "first": "first" }, { "first": "second" } ], "c": { "e": 1, "d": null }, "f": [ 2.0, null, 3.0 ], "arr": [ [ ] ], "name": null, "a": null } +{ "id": 37, "name": "Kohli", "nested": { "List": [ 1, 2, 3 ], "A": "a", "randomK": null }, "obj_array": [ { "first": "first" }, { "first": "second" } ], "a": { "b": 1 }, "c": { "d": 1, "e": 1 }, "f": [ 3.5999999046325684, 4.0 ], "arr": [ [ 1, 2, 3 ] ] } diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-null-type/parquet-null-type.08.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-null-type/parquet-null-type.08.adm index 628b82f725..624049e66b 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-null-type/parquet-null-type.08.adm +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-null-type/parquet-null-type.08.adm @@ -1,5 +1,5 @@ -{ "arr": [ [ 1, 2 ] ], "a": { }, "id": 8, "nested": { "List": [ 100, 300 ] }, "obj_array": [ { "first": "first" }, { }, { "first": "second" } ] } -{ "arr": [ [ 1, 2 ], [ ] ], "a": { }, "c": { }, "f": [ ], "name": "Virat", "id": 10, "nested": { "List": [ ] }, "obj_array": [ { "first": "first" }, { "first": "second" } ] } -{ "arr": [ [ 1, 2 ], [ ] ], "a": { "b": 1 }, "c": { "d": 1 }, "f": [ 1.0 ], "name": "Virat", "id": 28, "nested": { "A": "a", "List": [ ] }, "obj_array": [ { "first": "first" }, { "first": "second" } ] } -{ "arr": [ [ ] ], "c": { "e": 1 }, "f": [ 2.0, 3.0 ], "id": 34, "nested": { "randomK": "randomV" }, "obj_array": [ { "first": "first" }, { "first": "second" } ] } -{ "arr": [ [ 1, 2, 3 ] ], "a": { "b": 1 }, "c": { "d": 1, "e": 1 }, "f": [ 3.6, 4.0 ], "name": "Kohli", "id": 37, "nested": { "A": "a", "List": [ 1, 2, 3 ] }, "obj_array": [ { "first": "first" }, { "first": "second" } ] } +{ "arr": [ [ 1, null, 2 ], null ], "a": { "b": null }, "id": 8, "nested": { "List": [ 100, null, 300 ], "A": null, "randomK": null }, "obj_array": [ { "first": "first" }, { "first": null }, { "first": "second" } ], "c": null, "f": null, "name": null } +{ "arr": [ [ 1, 2 ], [ ] ], "a": { "b": null }, "c": { "d": null, "e": null }, "f": [ ], "name": "Virat", "id": 10, "nested": { "List": [ ], "A": null, "randomK": null }, "obj_array": [ { "first": "first" }, { "first": "second" } ] } +{ "arr": [ [ 1, 2 ], [ null ] ], "a": { "b": 1 }, "c": { "d": 1, "e": null }, "f": [ 1.0, null ], "name": "Virat", "id": 28, "nested": { "A": "a", "List": [ null ], "randomK": null }, "obj_array": [ { "first": "first" }, { "first": "second" } ] } +{ "arr": [ [ ] ], "c": { "e": 1, "d": null }, "f": [ 2.0, null, 3.0 ], "id": 34, "nested": { "randomK": "randomV", "A": null, "List": null }, "obj_array": [ { "first": "first" }, { "first": "second" } ], "a": null, "name": null } +{ "arr": [ [ 1, 2, 3 ] ], "a": { "b": 1 }, "c": { "d": 1, "e": 1 }, "f": [ 3.6, 4.0 ], "name": "Kohli", "id": 37, "nested": { "A": "a", "List": [ 1, 2, 3 ], "randomK": null }, "obj_array": [ { "first": "first" }, { "first": "second" } ] } diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-null2/parquet-null2.04.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-null2/parquet-null2.04.adm index 29ca9ecd0d..5122755b39 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-null2/parquet-null2.04.adm +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-null2/parquet-null2.04.adm @@ -1,3 +1,3 @@ { "c": { "col2": { "centuries": [ ], "name": "aqay awil", "id": 1 } } } { "c": { "col2": { "centuries": [ ], "id": 2 } } } -{ "c": { "col2": { "centuries": [ ], "id": 3 } } } +{ "c": { "col2": { "centuries": [ ], "id": 3, "name": null } } } diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-null3/parquet-null3.04.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-null3/parquet-null3.04.adm new file mode 100644 index 0000000000..6376754a10 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-null3/parquet-null3.04.adm @@ -0,0 +1,50 @@ +{ "id": 1, "name": "Alice Johnson", "age": 28, "address": { "street": "123 Main St", "city": "New York", "coordinates": { "lat": 40.7128, "lon": -74.006 } }, "tags": [ "premium", "verified", "active" ], "orders": [ { "orderId": 1001, "total": 299.99, "items": [ { "productId": 501, "quantity": 2, "metadata": { "category": "electronics", "attributes": { "color": "black", "size": "medium" } } } ] } ], "preferences": { "notifications": { "email": true, "sms": false }, "theme": "dark" }, "sco [...] +{ "id": 2, "name": "Bob Smith", "address": { "city": "Los Angeles", "coordinates": { "lat": 34.0522, "lon": null }, "street": null }, "tags": [ "active" ], "orders": [ ], "preferences": { "notifications": { "sms": true, "email": null }, "theme": null }, "scores": [ 88.0, null, 95.5 ], "age": null } +{ "id": 3, "name": "Carol Davis", "age": 35, "tags": [ ], "orders": [ { "orderId": 1002, "total": 15.99, "items": [ { "quantity": 1, "metadata": { "category": "books", "attributes": null }, "productId": null } ] } ], "preferences": { "theme": "light", "notifications": null }, "scores": [ ], "address": null } +{ "id": 4, "name": "David Wilson", "age": 42, "address": { "street": "456 Oak Ave", "city": "Chicago", "coordinates": null }, "orders": [ { "orderId": 1003, "total": 0.0, "items": [ ] } ], "scores": [ 70.5, 75.0, 80.5, 85.0 ], "tags": null, "preferences": null } +{ "id": 5, "name": "Emma Brown", "age": 31, "address": { "street": "789 Pine Rd", "city": "Houston", "coordinates": { "lat": null, "lon": null } }, "tags": [ "verified", null, "new" ], "orders": [ { "orderId": 1004, "items": [ { "productId": 502, "quantity": null, "metadata": null } ], "total": null } ], "preferences": { "notifications": { "email": false, "sms": false }, "theme": "auto" }, "scores": null } +{ "id": 6, "name": "Frank Miller", "age": 29, "address": { "street": "321 Elm St", "city": "Phoenix", "coordinates": { "lat": 33.4484, "lon": -112.074 } }, "tags": [ "premium" ], "orders": [ { "orderId": 1005, "total": 450.0, "items": [ { "productId": 503, "quantity": 3, "metadata": { "attributes": { "color": "blue", "size": null }, "category": null } }, null ] } ], "preferences": { "notifications": { "email": true, "sms": true }, "theme": "dark" }, "scores": [ 90.0, 85.5 ] } +{ "id": 7, "name": "Grace Lee", "address": { "street": "654 Maple Dr", "city": "Philadelphia", "coordinates": { "lat": 39.9526, "lon": -75.1652 } }, "tags": [ ], "preferences": { "notifications": { "email": null, "sms": null }, "theme": "light" }, "scores": [ null, null, null ], "age": null, "orders": null } +{ "id": 8, "name": "Henry Garcia", "age": 38, "address": { "city": "San Antonio", "street": null, "coordinates": null }, "tags": [ "active", "verified" ], "orders": [ { "orderId": 1006, "total": 125.5, "items": [ { "productId": 504, "quantity": 1, "metadata": { "category": "clothing", "attributes": { "size": "large", "color": null } } }, { "productId": 505, "quantity": 2, "metadata": { "category": "accessories", "attributes": { "color": "red", "size": "small" } } } ] }, null ], "preferen [...] +{ "id": 9, "name": "Ivy Martinez", "age": 26, "address": { "street": "987 Cedar Ln", "city": "San Diego", "coordinates": { "lat": 32.7157, "lon": -117.1611 } }, "tags": [ "new" ], "orders": [ { "orderId": 1007, "total": 0.0, "items": [ { "metadata": { "category": null, "attributes": null }, "productId": null, "quantity": null } ] } ], "scores": [ ], "preferences": null } +{ "id": 10, "name": "Jack Robinson", "age": 45, "tags": [ "premium", "verified", "active", "vip" ], "orders": [ { "orderId": 1008, "total": 999.99, "items": [ { "productId": 506, "quantity": 5, "metadata": { "category": "electronics", "attributes": { "color": "silver", "size": "extra-large" } } } ] }, { "orderId": 1009, "items": [ ], "total": null } ], "preferences": { "notifications": { "email": true, "sms": true }, "theme": "dark" }, "scores": [ 95.0, 98.5, 92.0, 89.5 ], "address": null } +{ "id": 11, "name": "Karen White", "address": { "street": "147 Birch St", "city": "Dallas", "coordinates": { "lon": -96.797, "lat": null } }, "orders": [ ], "preferences": { "theme": "auto", "notifications": null }, "scores": [ null ], "age": null, "tags": null } +{ "id": 12, "name": "Leo Harris", "age": 33, "address": { "street": "258 Spruce Ave", "city": "San Jose", "coordinates": { "lat": 37.3382, "lon": null } }, "tags": [ "active" ], "orders": [ { "orderId": 1010, "total": 50.0, "items": null } ], "preferences": { "notifications": { "email": false, "sms": null }, "theme": "light" }, "scores": [ 82.5, 87.0 ] } +{ "id": 13, "name": "Mia Clark", "age": 27, "address": { "street": "369 Willow Way", "city": "Austin", "coordinates": { "lat": 30.2672, "lon": -97.7431 } }, "tags": [ "verified", "new" ], "orders": [ { "orderId": 1011, "total": 175.25, "items": [ { "productId": 507, "quantity": 1, "metadata": { "category": "home", "attributes": { "color": "white", "size": null } } }, { "quantity": 2, "productId": null, "metadata": null } ] } ], "preferences": { "notifications": { "email": true, "sms": fa [...] +{ "id": 14, "name": "Noah Lewis", "age": 40, "tags": [ ], "scores": [ ], "address": null, "orders": null, "preferences": null } +{ "id": 15, "name": "Olivia Walker", "address": { "city": "Jacksonville", "street": null, "coordinates": null }, "tags": [ "premium" ], "orders": [ { "orderId": 1012, "total": 320.0, "items": [ { "productId": 508, "quantity": 4, "metadata": { "category": "sports", "attributes": { "color": "green", "size": "medium" } } } ] } ], "preferences": { "notifications": { "sms": true, "email": null }, "theme": null }, "scores": [ 88.0, 91.5, null, 86.0 ], "age": null } +{ "id": 16, "name": "Peter Hall", "age": 36, "address": { "street": "741 Ash Blvd", "city": "Fort Worth", "coordinates": { "lat": 32.7555, "lon": -97.3308 } }, "tags": [ "active", null ], "orders": [ ], "preferences": { "notifications": { "email": false, "sms": false }, "theme": "auto" }, "scores": [ null, 79.5, 84.0 ] } +{ "id": 17, "name": "Quinn Allen", "age": 32, "address": { "street": "852 Poplar Ct", "city": "Columbus", "coordinates": { "lat": 39.9612, "lon": null } }, "orders": [ { "orderId": 1013, "items": [ { "productId": 509, "metadata": { "attributes": { "color": null, "size": null }, "category": null }, "quantity": null } ], "total": null } ], "preferences": { "theme": "light", "notifications": null }, "scores": [ ], "tags": null } +{ "id": 18, "name": "Rachel Young", "age": 29, "address": { "street": "963 Fir Rd", "city": "Charlotte", "coordinates": { "lat": null, "lon": null } }, "tags": [ "verified" ], "orders": [ { "orderId": 1014, "total": 215.75, "items": [ { "productId": 510, "quantity": 3, "metadata": { "category": "electronics", "attributes": null } }, null, { "productId": 511, "quantity": 1, "metadata": { "category": "books", "attributes": { "size": "standard", "color": null } } } ] } ], "preferences": { " [...] +{ "id": 19, "name": "Samuel King", "tags": [ ], "orders": [ { "orderId": 1015, "total": 0.0, "items": [ ] } ], "age": null, "address": null, "preferences": null, "scores": null } +{ "id": 20, "name": "Tina Wright", "age": 34, "address": { "street": "159 Hickory Pl", "city": "Indianapolis", "coordinates": { "lat": 39.7684, "lon": -86.1581 } }, "tags": [ "premium", "active" ], "orders": [ { "orderId": 1016, "total": 89.99, "items": [ { "productId": 512, "quantity": 2, "metadata": { "category": "clothing", "attributes": { "color": "yellow", "size": "small" } } } ] }, { "orderId": 1017, "items": [ { "productId": null, "quantity": null, "metadata": null } ], "total": n [...] +{ "id": 21, "name": "Uma Scott", "age": 41, "address": { "city": "Seattle", "coordinates": { "lat": 47.6062, "lon": null }, "street": null }, "tags": [ "new", "verified" ], "preferences": { "notifications": { "email": null, "sms": null }, "theme": null }, "scores": [ 90.5 ], "orders": null } +{ "id": 22, "name": "Victor Green", "age": 37, "address": { "street": "753 Walnut St", "city": "Denver", "coordinates": null }, "orders": [ { "orderId": 1018, "total": 425.5, "items": [ { "productId": 513, "quantity": 5, "metadata": { "category": "toys", "attributes": { "color": "multi", "size": null } } }, { "productId": 514, "quantity": 1, "metadata": { "category": null, "attributes": null } } ] } ], "preferences": { "notifications": { "email": true, "sms": false }, "theme": "auto" }, [...] +{ "id": 23, "name": "Wendy Baker", "address": { "street": "864 Cherry Ave", "city": "Boston", "coordinates": { "lat": 42.3601, "lon": -71.0589 } }, "tags": [ "active" ], "orders": [ ], "scores": [ null, null ], "age": null, "preferences": null } +{ "id": 24, "name": "Xavier Adams", "age": 30, "address": { "street": "975 Beech Dr", "city": "Nashville", "coordinates": { "lon": -86.7816, "lat": null } }, "tags": [ "premium", null, "verified" ], "orders": [ { "orderId": 1019, "total": 150.0, "items": null } ], "preferences": { "notifications": { "email": false, "sms": null }, "theme": "dark" }, "scores": [ 85.0, 88.5, 92.0 ] } +{ "id": 25, "name": "Yara Nelson", "age": 28, "tags": [ ], "orders": [ { "orderId": 1020, "total": 25.99, "items": [ { "productId": 515, "quantity": 1, "metadata": { "category": "food", "attributes": { "color": null, "size": null } } } ] } ], "preferences": { "notifications": { "email": true, "sms": true }, "theme": "light" }, "address": null, "scores": null } +{ "id": 26, "name": "Zack Carter", "age": 43, "address": { "street": "147 Redwood Ln", "city": "Detroit", "coordinates": { "lat": 42.3314, "lon": -83.0458 } }, "tags": [ "verified", "vip" ], "orders": [ { "orderId": 1021, "total": 550.0, "items": [ { "productId": 516, "quantity": 2, "metadata": { "category": "electronics", "attributes": { "color": "black", "size": "large" } } }, { "productId": 517, "metadata": { "category": "accessories", "attributes": null }, "quantity": null } ] }, nul [...] +{ "id": 27, "name": "Amy Mitchell", "address": { "city": "Memphis", "street": null, "coordinates": null }, "orders": [ ], "preferences": { "theme": "auto", "notifications": null }, "scores": [ ], "age": null, "tags": null } +{ "id": 28, "name": "Ben Perez", "age": 35, "address": { "street": "258 Magnolia Rd", "city": "Portland", "coordinates": { "lat": 45.5152, "lon": null } }, "tags": [ "active", "new" ], "orders": [ { "orderId": 1022, "items": [ { "quantity": 3, "productId": null, "metadata": null } ], "total": null } ], "preferences": { "notifications": { "email": true, "sms": false }, "theme": "dark" }, "scores": [ 80.0, 83.5 ] } +{ "id": 29, "name": "Clara Roberts", "age": 39, "address": { "street": "369 Sycamore Way", "city": "Las Vegas", "coordinates": { "lat": 36.1699, "lon": -115.1398 } }, "tags": [ "premium" ], "orders": [ { "orderId": 1023, "total": 850.0, "items": [ { "productId": 518, "quantity": 1, "metadata": { "category": "jewelry", "attributes": { "color": "gold", "size": "one-size" } } }, null, { "productId": 519, "quantity": 2, "metadata": { "category": "jewelry", "attributes": { "color": "silver", [...] +{ "id": 30, "name": "Derek Turner", "age": 31, "tags": [ ], "preferences": { "notifications": { "email": false, "sms": false }, "theme": "light" }, "scores": [ 78.5, 82.0, 85.5, 87.0 ], "address": null, "orders": null } +{ "id": 31, "name": "Eva Phillips", "address": { "street": "741 Cottonwood Dr", "city": "Baltimore", "coordinates": { "lat": null, "lon": null } }, "tags": [ "verified" ], "orders": [ { "orderId": 1024, "total": 0.0, "items": [ ] } ], "preferences": { "notifications": { "sms": true, "email": null }, "theme": null }, "age": null, "scores": null } +{ "id": 32, "name": "Fred Campbell", "age": 44, "address": { "street": "852 Dogwood Ave", "city": "Milwaukee", "coordinates": { "lat": 43.0389, "lon": -87.9065 } }, "orders": [ { "orderId": 1025, "total": 199.99, "items": [ { "productId": 520, "quantity": 4, "metadata": { "category": "garden", "attributes": null } } ] }, { "orderId": 1026, "total": 75.5, "items": null } ], "preferences": { "notifications": { "email": true, "sms": true }, "theme": "auto" }, "scores": [ 89.0 ], "tags": null } +{ "id": 33, "name": "Gina Parker", "age": 26, "address": { "city": "Albuquerque", "coordinates": { "lat": 35.0844, "lon": null }, "street": null }, "tags": [ "new", null, "active" ], "orders": [ ], "scores": [ ], "preferences": null } +{ "id": 34, "name": "Hank Evans", "age": 38, "address": { "street": "963 Laurel Ct", "city": "Tucson", "coordinates": null }, "tags": [ "premium", "verified" ], "orders": [ { "orderId": 1027, "total": 375.0, "items": [ { "productId": 521, "quantity": 1, "metadata": { "category": "automotive", "attributes": { "color": "gray", "size": "universal" } } }, { "metadata": { "attributes": { "color": null, "size": null }, "category": null }, "productId": null, "quantity": null } ] } ], "preferenc [...] +{ "id": 35, "name": "Iris Edwards", "tags": [ ], "orders": [ { "orderId": 1028, "total": 55.0, "items": [ { "productId": 522, "quantity": 2, "metadata": null } ] } ], "preferences": { "notifications": { "email": true, "sms": false }, "theme": "light" }, "scores": [ null ], "age": null, "address": null } +{ "id": 36, "name": "Jake Collins", "age": 33, "address": { "street": "159 Juniper Ln", "city": "Fresno", "coordinates": { "lat": 36.7378, "lon": -119.7871 } }, "tags": [ "active" ], "preferences": { "notifications": null, "theme": null }, "scores": [ 92.5, 88.0, 85.5 ], "orders": null } +{ "id": 37, "name": "Kara Stewart", "age": 29, "address": { "street": "753 Cypress Rd", "city": "Sacramento", "coordinates": { "lon": -121.4944, "lat": null } }, "orders": [ { "orderId": 1029, "total": 125.0, "items": [ { "productId": 523, "quantity": 3, "metadata": { "category": "office", "attributes": { "color": "blue", "size": null } } }, null ] } ], "preferences": { "notifications": { "email": null, "sms": null }, "theme": "auto" }, "scores": [ ], "tags": null } +{ "id": 38, "name": "Liam Sanchez", "age": 42, "address": { "street": "864 Alder Blvd", "city": "Long Beach", "coordinates": { "lat": 33.7701, "lon": null } }, "tags": [ "verified", "premium" ], "orders": [ { "orderId": 1030, "items": [ ], "total": null } ], "preferences": null, "scores": null } +{ "id": 39, "name": "Maya Morris", "address": { "city": "Kansas City", "street": null, "coordinates": null }, "tags": [ ], "orders": [ { "orderId": 1031, "total": 45.99, "items": [ { "productId": 524, "metadata": { "category": "pet", "attributes": null }, "quantity": null } ] } ], "preferences": { "notifications": { "email": false, "sms": true }, "theme": "dark" }, "scores": [ null, null, 87.5 ], "age": null } +{ "id": 40, "name": "Nate Rogers", "age": 36, "address": { "street": "975 Hawthorn Pl", "city": "Mesa", "coordinates": { "lat": 33.4152, "lon": -111.8315 } }, "tags": [ "new" ], "orders": [ { "orderId": 1032, "total": 220.0, "items": [ { "productId": 525, "quantity": 1, "metadata": { "category": "music", "attributes": { "size": "standard", "color": null } } }, { "productId": 526, "quantity": 2, "metadata": null } ] }, null ], "preferences": { "notifications": { "email": true, "sms": null [...] +{ "id": 41, "name": "Olive Reed", "age": 27, "orders": [ ], "preferences": { "notifications": { "sms": false, "email": null }, "theme": null }, "scores": [ ], "address": null, "tags": null } +{ "id": 42, "name": "Paul Cook", "address": { "street": "147 Hemlock St", "city": "Atlanta", "coordinates": { "lat": 33.749, "lon": -84.388 } }, "tags": [ "active", "verified" ], "orders": [ { "orderId": 1033, "items": [ { "quantity": 5, "metadata": { "attributes": { "color": "orange", "size": null }, "category": null }, "productId": null } ], "total": null } ], "preferences": { "notifications": { "email": true, "sms": true }, "theme": "auto" }, "scores": [ 91.0, null, 94.5 ], "age": null } +{ "id": 43, "name": "Quincy Bell", "age": 34, "address": { "street": "258 Sequoia Ave", "city": "Colorado Springs", "coordinates": { "lat": null, "lon": null } }, "tags": [ "premium" ], "scores": [ 75.5, 80.0, 84.5 ], "orders": null, "preferences": null } +{ "id": 44, "name": "Rita Morgan", "age": 40, "address": { "city": "Raleigh", "coordinates": { "lat": 35.7796, "lon": null }, "street": null }, "tags": [ ], "orders": [ { "orderId": 1034, "total": 135.5, "items": [ { "productId": 527, "quantity": 1, "metadata": { "category": "beauty", "attributes": { "color": "pink", "size": "travel" } } }, null, { "productId": 528, "metadata": { "category": "beauty", "attributes": null }, "quantity": null } ] } ], "preferences": { "notifications": { "e [...] +{ "id": 45, "name": "Steve Bailey", "address": { "street": "369 Buckeye Dr", "city": "Omaha", "coordinates": null }, "tags": [ "verified", null ], "orders": [ { "orderId": 1035, "total": 99.99, "items": null } ], "preferences": { "theme": "light", "notifications": null }, "scores": [ ], "age": null } +{ "id": 46, "name": "Tara Rivera", "age": 32, "address": { "street": "741 Chestnut Way", "city": "Miami", "coordinates": { "lat": 25.7617, "lon": -80.1918 } }, "tags": [ "new", "active" ], "orders": [ { "orderId": 1036, "total": 180.0, "items": [ { "productId": 529, "quantity": 2, "metadata": { "category": "fitness", "attributes": { "size": "medium", "color": null } } } ] }, { "orderId": 1037, "total": 0.0, "items": [ ] } ], "preferences": { "notifications": { "email": true, "sms": fals [...] +{ "id": 47, "name": "Ursula Cooper", "age": 37, "address": null, "tags": null, "orders": null, "preferences": null, "scores": null } +{ "id": 48, "name": "Vince Richardson", "age": 30, "address": { "street": "852 Palmetto Ln", "city": "Oakland", "coordinates": { "lat": 37.8044, "lon": null } }, "tags": [ "premium", "verified" ], "orders": [ { "orderId": 1038, "total": 675.0, "items": [ { "productId": 530, "quantity": 3, "metadata": { "category": "tech", "attributes": { "color": "white", "size": null } } }, { "quantity": 1, "productId": null, "metadata": null } ] } ], "preferences": { "notifications": { "sms": true, "em [...] +{ "id": 49, "name": "Wanda Cox", "address": { "city": "Minneapolis", "coordinates": { "lon": -93.265, "lat": null }, "street": null }, "tags": [ ], "orders": [ { "orderId": 1039, "items": [ { "productId": 531, "metadata": { "category": "craft", "attributes": { "color": "multi", "size": "kit" } }, "quantity": null } ], "total": null } ], "preferences": { "notifications": { "email": false, "sms": null }, "theme": "dark" }, "scores": [ null, 86.0 ], "age": null } +{ "id": 50, "name": "Xander Howard", "age": 35, "address": { "street": "963 Cedarwood Ct", "city": "Tulsa", "coordinates": { "lat": 36.154, "lon": -95.9928 } }, "tags": [ "active", "vip", "premium" ], "orders": [ { "orderId": 1040, "total": 1250.0, "items": [ { "productId": 532, "quantity": 10, "metadata": { "category": "wholesale", "attributes": { "color": "assorted", "size": "bulk" } } }, { "productId": 533, "quantity": 5, "metadata": { "category": "wholesale", "attributes": { "size": [...] diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-type-hierarchy/parquet-type-hierarchy.05.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-type-hierarchy/parquet-type-hierarchy.05.adm index 4fd973e9bc..677a93a757 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-type-hierarchy/parquet-type-hierarchy.05.adm +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-type-hierarchy/parquet-type-hierarchy.05.adm @@ -1,4 +1,4 @@ -{ "ratings": [ ], "id": 2 } +{ "ratings": [ null ], "id": 2, "rating": null } { "ratings": [ ], "rating": 1.0, "id": 5 } { "ratings": [ 1 ], "rating": 2.0, "id": 8 } { "ratings": [ 1, 2, 3 ], "rating": 3.0, "id": 10 } diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/dynamic-prefixes/parquet/embed-one-value/one-field.110.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/dynamic-prefixes/parquet/embed-one-value/one-field.110.adm index de64450eab..410aed4abb 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/dynamic-prefixes/parquet/embed-one-value/one-field.110.adm +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/dynamic-prefixes/parquet/embed-one-value/one-field.110.adm @@ -3,7 +3,7 @@ { "id": 3, "department": "accounting", "name": { "first": "Alex", "last": "miller" } } { "id": 4, "department": "engineering", "name": { "first": "Tom", "last": "smith" } } { "id": 5, "department": "engineering", "name": { "first": "Alice", "last": "Jones" } } -{ "id": 6, "department": "engineering", "name": { "last": "miller" } } +{ "id": 6, "department": "engineering", "name": { "first": null, "last": "miller" } } { "id": 7, "department": "hr", "name": { "first": "James", "last": "smith" } } { "id": 8, "department": "hr", "name": { "last": "Jones" } } { "id": 9, "department": "hr", "name": { "last": "miller" } } diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/array-access-pushdown/array-access-pushdown.02.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/array-access-pushdown/array-access-pushdown.02.adm index 18f3275beb..ff1820c120 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/array-access-pushdown/array-access-pushdown.02.adm +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/array-access-pushdown/array-access-pushdown.02.adm @@ -1,2 +1,2 @@ { "display_url": "string" } -{ } +{ "display_url": null } diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/array-access-pushdown/array-access-pushdown.04.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/array-access-pushdown/array-access-pushdown.04.adm index 18f3275beb..ff1820c120 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/array-access-pushdown/array-access-pushdown.04.adm +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/array-access-pushdown/array-access-pushdown.04.adm @@ -1,2 +1,2 @@ { "display_url": "string" } -{ } +{ "display_url": null } diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/array-access-pushdown/array-access-pushdown.06.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/array-access-pushdown/array-access-pushdown.06.adm index 695240b26c..113982075f 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/array-access-pushdown/array-access-pushdown.06.adm +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/array-access-pushdown/array-access-pushdown.06.adm @@ -1 +1,2 @@ { "display_url": [ "string" ] } +{ "display_url": null } diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/array-access-pushdown/array-access-pushdown.08.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/array-access-pushdown/array-access-pushdown.08.adm index 695240b26c..113982075f 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/array-access-pushdown/array-access-pushdown.08.adm +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/array-access-pushdown/array-access-pushdown.08.adm @@ -1 +1,2 @@ { "display_url": [ "string" ] } +{ "display_url": null } diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/missing-fields/missing-fields.3.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/missing-fields/missing-fields.3.adm index 8876910a54..be78ff92e5 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/missing-fields/missing-fields.3.adm +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/missing-fields/missing-fields.3.adm @@ -1,2 +1,2 @@ -{ "f1": true, "f2": true } -{ "f1": true, "f2": true } +{ "f1": true, "f2": false } +{ "f1": true, "f2": false } diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/null-in-array/null-in-array.02.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/null-in-array/null-in-array.02.adm new file mode 100644 index 0000000000..058317f3f2 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/null-in-array/null-in-array.02.adm @@ -0,0 +1,10 @@ +{ "id": 1, "numbers": [ 10, null, 30 ] } +{ "id": 2, "items": [ { "value": 10 }, { "value": null }, { "value": 30 } ] } +{ "id": 3, "contacts": [ { "name": "Alice", "age": 30 }, { "name": "Bob", "age": 25 } ] } +{ "id": 4, "contacts": [ { "name": "Charlie", "age": 35 }, null, { "name": "David", "age": 28 } ] } +{ "id": 5, "contacts": [ { "name": "Eve", "age": null }, { "age": 40, "name": null } ] } +{ "id": 6, "contacts": null } +{ "id": 7, "contacts": [ ] } +{ "id": 8, "contacts": [ { "name": "Frank", "age": 45 }, { "name": null, "age": null }, { "name": "Grace", "age": 50 } ] } +{ "id": 9, "numbers": [ 5, 15, 25 ] } +{ "id": 10, "contacts": [ { "name": "Alice", "age": 30 }, { "name": "Bob", "age": null }, { "name": null, "age": null }, { "age": 25, "name": null } ] } diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/parquet-types/unset-flags/unset-flags.02.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/parquet-types/unset-flags/unset-flags.02.adm index c42147bec6..82ed76a6b0 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/parquet-types/unset-flags/unset-flags.02.adm +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/parquet-types/unset-flags/unset-flags.02.adm @@ -1 +1 @@ -{ "boolean_field": true, "int8_field": 8, "int16_field": 16, "int32_field": 32, "int64_field": 64, "uint8_field": 128, "uint16_field": 32768, "uint32_field": 2147483648, "uint64_field": 151, "float_field": 1.0, "double_field": 1.0, "binary_field": hex("000102"), "string_field": "stringVal", "enum_field": "enumVal", "json_field": [ 1, 2, 3 ], "date_field": date("2022-01-01"), "time32_millis_field": time("01:00:00.000"), "time64_micros_field": time("01:00:00.000"), "time64_nanos_field": ti [...] +{ "boolean_field": true, "int8_field": 8, "int16_field": 16, "int32_field": 32, "int64_field": 64, "uint8_field": 128, "uint16_field": 32768, "uint32_field": 2147483648, "uint64_field": 151, "overflowed_uint64_field": null, "float_field": 1.0, "double_field": 1.0, "binary_field": hex("000102"), "string_field": "stringVal", "enum_field": "enumVal", "json_field": [ 1, 2, 3 ], "date_field": date("2022-01-01"), "time32_millis_field": time("01:00:00.000"), "time64_micros_field": time("01:00:0 [...] \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/select-all-fields/select-all-fields.2.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/select-all-fields/select-all-fields.2.adm index 53f2518eb9..211cf89e58 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/select-all-fields/select-all-fields.2.adm +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/select-all-fields/select-all-fields.2.adm @@ -1,2 +1,2 @@ { "coordinates": { "coordinates": [ 1.1 ], "type": "string" }, "created_at": "string", "entities": { "urls": [ { "display_url": "string", "expanded_url": "string", "indices": [ 1 ], "url": "string" } ], "user_mentions": [ { "id": 1, "id_str": "string", "indices": [ 1 ], "name": "string", "screen_name": "string" } ] }, "favorite_count": 1, "favorited": true, "filter_level": "string", "geo": { "coordinates": [ 1.1 ], "type": "string" }, "id": "0000000", "id_str": "string", "in_reply_to_scr [...] -{ "coordinates": { "coordinates": [ 1.1 ], "type": "string" }, "created_at": "string", "favorite_count": 1, "favorited": true, "filter_level": "string", "geo": { "coordinates": [ 1.1 ], "type": "string" }, "id": "11111111111111111111", "id_str": "string", "in_reply_to_screen_name": "string", "in_reply_to_status_id": 1, "in_reply_to_status_id_str": "string", "in_reply_to_user_id": 1, "in_reply_to_user_id_str": "string", "is_quote_status": true, "lang": "string", "place": { "bounding_box": [...] +{ "coordinates": { "coordinates": [ 1.1 ], "type": "string" }, "created_at": "string", "favorite_count": 1, "favorited": true, "filter_level": "string", "geo": { "coordinates": [ 1.1 ], "type": "string" }, "id": "11111111111111111111", "id_str": "string", "in_reply_to_screen_name": "string", "in_reply_to_status_id": 1, "in_reply_to_status_id_str": "string", "in_reply_to_user_id": 1, "in_reply_to_user_id_str": "string", "is_quote_status": true, "lang": "string", "place": { "bounding_box": [...] diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/select-all-fields/select-all-fields.3.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/select-all-fields/select-all-fields.3.adm index 53f2518eb9..211cf89e58 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/select-all-fields/select-all-fields.3.adm +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/select-all-fields/select-all-fields.3.adm @@ -1,2 +1,2 @@ { "coordinates": { "coordinates": [ 1.1 ], "type": "string" }, "created_at": "string", "entities": { "urls": [ { "display_url": "string", "expanded_url": "string", "indices": [ 1 ], "url": "string" } ], "user_mentions": [ { "id": 1, "id_str": "string", "indices": [ 1 ], "name": "string", "screen_name": "string" } ] }, "favorite_count": 1, "favorited": true, "filter_level": "string", "geo": { "coordinates": [ 1.1 ], "type": "string" }, "id": "0000000", "id_str": "string", "in_reply_to_scr [...] -{ "coordinates": { "coordinates": [ 1.1 ], "type": "string" }, "created_at": "string", "favorite_count": 1, "favorited": true, "filter_level": "string", "geo": { "coordinates": [ 1.1 ], "type": "string" }, "id": "11111111111111111111", "id_str": "string", "in_reply_to_screen_name": "string", "in_reply_to_status_id": 1, "in_reply_to_status_id_str": "string", "in_reply_to_user_id": 1, "in_reply_to_user_id_str": "string", "is_quote_status": true, "lang": "string", "place": { "bounding_box": [...] +{ "coordinates": { "coordinates": [ 1.1 ], "type": "string" }, "created_at": "string", "favorite_count": 1, "favorited": true, "filter_level": "string", "geo": { "coordinates": [ 1.1 ], "type": "string" }, "id": "11111111111111111111", "id_str": "string", "in_reply_to_screen_name": "string", "in_reply_to_status_id": 1, "in_reply_to_status_id_str": "string", "in_reply_to_user_id": 1, "in_reply_to_user_id_str": "string", "is_quote_status": true, "lang": "string", "place": { "bounding_box": [...] diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/type-mismatch/type-mismatch.02.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/type-mismatch/type-mismatch.02.adm index 0be5d980da..4b095fd0ff 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/type-mismatch/type-mismatch.02.adm +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/type-mismatch/type-mismatch.02.adm @@ -1,2 +1,2 @@ -true -true \ No newline at end of file +false +false diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/type-mismatch/type-mismatch.04.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/type-mismatch/type-mismatch.04.adm index 0be5d980da..4b095fd0ff 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/type-mismatch/type-mismatch.04.adm +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/type-mismatch/type-mismatch.04.adm @@ -1,2 +1,2 @@ -true -true \ No newline at end of file +false +false diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml index 81ec204e84..141e1c65ab 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml +++ b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml @@ -99,6 +99,16 @@ <output-dir compare="Text">parquet-null2</output-dir> </compilation-unit> </test-case> + <test-case FilePath="copy-to"> + <compilation-unit name="parquet-null3"> + <placeholder name="adapter" value="S3" /> + <placeholder name="pathprefix" value="" /> + <placeholder name="path_prefix" value="" /> + <placeholder name="additionalProperties" value='"container":"playground",' /> + <placeholder name="additional_Properties" value='("container"="playground")' /> + <output-dir compare="Text">parquet-null3</output-dir> + </compilation-unit> + </test-case> <test-case FilePath="copy-to"> <compilation-unit name="parquet-tweet"> <placeholder name="adapter" value="S3" /> @@ -524,6 +534,12 @@ <output-dir compare="Clean-JSON">common/parquet/ASTERIXDB-3540</output-dir> </compilation-unit> </test-case> + <test-case FilePath="external-dataset"> + <compilation-unit name="common/parquet/null-in-array"> + <placeholder name="adapter" value="S3" /> + <output-dir compare="Clean-JSON">common/parquet/null-in-array</output-dir> + </compilation-unit> + </test-case> <test-case FilePath="external-dataset"> <compilation-unit name="common/parquet/array-access-pushdown"> <placeholder name="adapter" value="S3" /> diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/hdfs/parquet/converter/ParquetConverterContext.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/hdfs/parquet/converter/ParquetConverterContext.java index db63adc853..7e9b3a2052 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/hdfs/parquet/converter/ParquetConverterContext.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/hdfs/parquet/converter/ParquetConverterContext.java @@ -42,6 +42,7 @@ import org.apache.asterix.om.base.AMutableDateTime; import org.apache.asterix.om.base.AMutableDouble; import org.apache.asterix.om.base.AMutableInt64; import org.apache.asterix.om.base.AMutableTime; +import org.apache.asterix.om.base.ANull; import org.apache.asterix.om.base.ATime; import org.apache.asterix.om.types.ATypeTag; import org.apache.asterix.om.types.BuiltinType; @@ -82,6 +83,9 @@ public class ParquetConverterContext extends ParserContext { @SuppressWarnings("unchecked") private final ISerializerDeserializer<ADateTime> datetimeSerDer = SerializerDeserializerProvider.INSTANCE.getSerializerDeserializer(BuiltinType.ADATETIME); + @SuppressWarnings("unchecked") + private final ISerializerDeserializer<ANull> nullSerDer = + SerializerDeserializerProvider.INSTANCE.getSerializerDeserializer(BuiltinType.ANULL); //Issued warnings private final List<Warning> warnings; @@ -293,6 +297,14 @@ public class ParquetConverterContext extends ParserContext { } } + public void serializeNull(DataOutput output) { + try { + nullSerDer.serialize(ANull.NULL, output); + } catch (HyracksDataException e) { + throw new IllegalStateException(e); + } + } + private void writeLength(int length, DataOutput out) throws IOException { int requiredLength = VarLenIntEncoderDecoder.getBytesRequired(length); if (lengthBytes == null || requiredLength > lengthBytes.length) { diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/hdfs/parquet/converter/nested/ObjectConverter.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/hdfs/parquet/converter/nested/ObjectConverter.java index 6b63a7b668..aab0b470f4 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/hdfs/parquet/converter/nested/ObjectConverter.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/hdfs/parquet/converter/nested/ObjectConverter.java @@ -18,6 +18,8 @@ */ package org.apache.asterix.external.input.record.reader.hdfs.parquet.converter.nested; +import static org.apache.asterix.runtime.evaluators.functions.PointableHelper.NULL_REF; + import java.io.IOException; import org.apache.asterix.builders.IARecordBuilder; @@ -41,15 +43,21 @@ public class ObjectConverter extends AbstractComplexConverter { * {@link IExternalFilterValueEmbedder} decides whether the object should be ignored entirely */ private boolean ignore = false; + private final GroupType parquetType; + private final boolean[] isValueAdded; public ObjectConverter(AbstractComplexConverter parent, int index, GroupType parquetType, ParquetConverterContext context) throws IOException { super(parent, index, parquetType, context); + this.parquetType = parquetType; + isValueAdded = new boolean[parquetType.getFieldCount()]; } public ObjectConverter(AbstractComplexConverter parent, String stringFieldName, int index, GroupType parquetType, ParquetConverterContext context) throws IOException { super(parent, stringFieldName, index, parquetType, context); + this.parquetType = parquetType; + isValueAdded = new boolean[parquetType.getFieldCount()]; } @Override @@ -63,12 +71,33 @@ public class ObjectConverter extends AbstractComplexConverter { } else { ignore = checkValueEmbedder(valueEmbedder); } + for (int i = 0; i < parquetType.getFieldCount(); i++) { + isValueAdded[i] = false; + } + } @Override public void end() { closeDirectRepeatedChildren(); if (!ignore) { + IExternalFilterValueEmbedder valueEmbedder = context.getValueEmbedder(); + for (int i = 0; i < parquetType.getFieldCount(); i++) { + if (!isValueAdded[i]) { + String childFieldName = parquetType.getFieldName(i); + try { + if (valueEmbedder.shouldEmbed(childFieldName, ATypeTag.NULL)) { + builder.addField(context.getSerializedFieldName(childFieldName), + valueEmbedder.getEmbeddedValue()); + } else { + builder.addField(context.getSerializedFieldName(childFieldName), NULL_REF); + } + } catch (IOException e) { + throw new RuntimeException(e); + } + } + } + writeToParent(); context.getValueEmbedder().exitObject(); } @@ -92,6 +121,9 @@ public class ObjectConverter extends AbstractComplexConverter { } IExternalFilterValueEmbedder valueEmbedder = context.getValueEmbedder(); IValueReference fieldName = value.getFieldName(); + String fieldNameStr = value.getStringFieldName(); + int fieldIndex = parquetType.getFieldIndex(fieldNameStr); + isValueAdded[fieldIndex] = true; try { if (valueEmbedder.shouldEmbed(value.getStringFieldName(), value.getTypeTag())) { builder.addField(fieldName, valueEmbedder.getEmbeddedValue()); diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/hdfs/parquet/converter/nested/ObjectRepeatedConverter.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/hdfs/parquet/converter/nested/ObjectRepeatedConverter.java index ef944ed9c5..9f79b36b7d 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/hdfs/parquet/converter/nested/ObjectRepeatedConverter.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/hdfs/parquet/converter/nested/ObjectRepeatedConverter.java @@ -18,6 +18,8 @@ */ package org.apache.asterix.external.input.record.reader.hdfs.parquet.converter.nested; +import static org.apache.asterix.runtime.evaluators.functions.PointableHelper.NULL_REF; + import java.io.DataOutput; import java.io.IOException; @@ -56,9 +58,9 @@ import org.apache.parquet.schema.PrimitiveType; * * Instead of: * message arrow_schema { - * required group myGroupArray (LIST) { + * required group my_group_list (LIST) { * repeated group list { - * optional group { + * optional group element { * optional binary hello (STRING); * optional binary foo (STRING); * } @@ -77,10 +79,14 @@ public class ObjectRepeatedConverter extends AbstractComplexConverter { * {@link IExternalFilterValueEmbedder} decides whether the object should be ignored entirely */ private boolean ignore = false; + private final GroupType parquetType; + private final boolean[] isValueAdded; public ObjectRepeatedConverter(AbstractComplexConverter parent, String stringFieldName, int index, GroupType parquetType, ParquetConverterContext context) throws IOException { super(parent, stringFieldName, index, parquetType, context); + this.parquetType = parquetType; + isValueAdded = new boolean[parquetType.getFieldCount()]; } @Override @@ -94,12 +100,31 @@ public class ObjectRepeatedConverter extends AbstractComplexConverter { } else { ignore = checkValueEmbedder(valueEmbedder); } + for (int i = 0; i < parquetType.getFieldCount(); i++) { + isValueAdded[i] = false; + } } @Override public void end() { closeDirectRepeatedChildren(); if (!ignore) { + IExternalFilterValueEmbedder valueEmbedder = context.getValueEmbedder(); + for (int i = 0; i < parquetType.getFieldCount(); i++) { + if (!isValueAdded[i]) { + String childFieldName = parquetType.getFieldName(i); + try { + if (valueEmbedder.shouldEmbed(childFieldName, ATypeTag.NULL)) { + recordBuilder.addField(context.getSerializedFieldName(childFieldName), + valueEmbedder.getEmbeddedValue()); + } else { + recordBuilder.addField(context.getSerializedFieldName(childFieldName), NULL_REF); + } + } catch (IOException e) { + throw new RuntimeException(e); + } + } + } writeToList(); context.getValueEmbedder().exitObject(); } @@ -167,6 +192,9 @@ public class ObjectRepeatedConverter extends AbstractComplexConverter { } IExternalFilterValueEmbedder valueEmbedder = context.getValueEmbedder(); IValueReference fieldName = value.getFieldName(); + String fieldNameStr = value.getStringFieldName(); + int fieldIndex = parquetType.getFieldIndex(fieldNameStr); + isValueAdded[fieldIndex] = true; try { if (valueEmbedder.shouldEmbed(value.getStringFieldName(), value.getTypeTag())) { recordBuilder.addField(fieldName, valueEmbedder.getEmbeddedValue()); diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/hdfs/parquet/converter/nested/RepeatedConverter.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/hdfs/parquet/converter/nested/RepeatedConverter.java index 87d0ae49e7..812e358e29 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/hdfs/parquet/converter/nested/RepeatedConverter.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/hdfs/parquet/converter/nested/RepeatedConverter.java @@ -31,6 +31,8 @@ import org.apache.parquet.schema.GroupType; import org.apache.parquet.schema.PrimitiveType; public class RepeatedConverter extends AbstractComplexConverter { + private boolean valueAdded; + public RepeatedConverter(AbstractComplexConverter parent, int index, GroupType parquetType, ParquetConverterContext context) throws IOException { super(parent, index, parquetType, context); @@ -38,12 +40,16 @@ public class RepeatedConverter extends AbstractComplexConverter { @Override public void start() { - //NoOp + valueAdded = false; } @Override public void end() { - //NoOp + if (!valueAdded) { + context.serializeNull(getDataOutput()); + parent.addValue(this); + } + valueAdded = false; } @Override @@ -53,6 +59,7 @@ public class RepeatedConverter extends AbstractComplexConverter { @Override public void addValue(IFieldValue value) { + valueAdded = true; parent.addValue(value); } diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/hdfs/parquet/converter/primitve/PrimitiveRepeatedConverter.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/hdfs/parquet/converter/primitve/PrimitiveRepeatedConverter.java index a832484a2a..45f611ce2a 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/hdfs/parquet/converter/primitve/PrimitiveRepeatedConverter.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/hdfs/parquet/converter/primitve/PrimitiveRepeatedConverter.java @@ -48,7 +48,7 @@ import org.apache.parquet.schema.PrimitiveType; * message schema { * required group my_primitive_array (LIST) { * repeated group list { - * optional binary (STRING); + * optional binary element (STRING); * } * } * }
