This is an automated email from the ASF dual-hosted git repository. mblow pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/asterixdb.git
commit 641424faebcdd39345b496e8bf83d869c40e458f Author: ayush.tripathi <[email protected]> AuthorDate: Mon Dec 2 21:03:02 2024 +0530 [ASTERIXDB-3353][EXT] Fix handling of arrays in Avro union types - user model changes: no - storage format changes: no - interface changes: no Details: Resolves issues in the Avro data parser when processing union types containing arrays and maps. Ext-ref: MB-64467 Change-Id: Icbb42892984a2c30d8c3faa4dfbf6d16f95f61cc Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/19167 Tested-by: Jenkins <[email protected]> Reviewed-by: Ayush Tripathi <[email protected]> Reviewed-by: Murtadha Hubail <[email protected]> --- .../data/hdfs/parquet/partition_heterogeneous.json | 100 +++++++++++++++++++++ .../external_dataset/ExternalDatasetTestUtils.java | 2 + .../avro/AvroFileExampleGeneratorUtil.java | 55 ++++++++++-- .../avro-types/avro-union/avro-union.01.ddl.sqlpp | 11 ++- ...nion.01.ddl.sqlpp => avro-union.03.query.sqlpp} | 25 ++---- .../heterogeneous-access.1.ddl.sqlpp | 2 +- .../avro/avro-types/avro-map/avro-map.02.adm | 2 + .../avro/avro-types/avro-map/avro-map.03.adm | 4 +- .../avro-nested-records/avro-nested-records.02.adm | 4 +- .../avro-nested-records/avro-nested-records.03.adm | 4 +- .../avro-primitives/avro-primitives.02.adm | 4 +- .../avro-primitives/avro-primitives.03.adm | 4 +- .../avro/avro-types/avro-union/avro-union.02.adm | 2 + .../avro/avro-types/avro-union/avro-union.03.adm | 100 +++++++++++++++++++++ .../asterix/external/parser/AvroDataParser.java | 5 ++ 15 files changed, 292 insertions(+), 32 deletions(-) diff --git a/asterixdb/asterix-app/data/hdfs/parquet/partition_heterogeneous.json b/asterixdb/asterix-app/data/hdfs/parquet/partition_heterogeneous.json new file mode 100644 index 0000000000..e6179d600c --- /dev/null +++ b/asterixdb/asterix-app/data/hdfs/parquet/partition_heterogeneous.json @@ -0,0 +1,100 @@ +{"id": 1, "partitioner_key": "A", "name": {"nickname": "VK"}, "randomField": 5678, "active": [true, false], "price": 100} +{"id": 2, "partitioner_key": "A", "name": ["Kohli", "Dhoni"], "randomField": 18, "active": 1, "price": {"amount": 99.99, "currency": "USD"}} +{"id": 3, "partitioner_key": "C", "name": false, "randomField": [1, 2, 3], "active": {"status": "active"}, "price": "one hundred"} +{"id": 4, "partitioner_key": "C", "name": {"nickname": "VK"}, "randomField": 5678, "active": [true, false], "price": 100} +{"id": 5, "partitioner_key": "A", "name": {"first": "Virat", "last": "Kohli"}, "randomField": "1234", "active": "yes", "price": "99.99"} +{"id": 6, "partitioner_key": "C", "name": 3456, "randomField": {"value": 7890}, "active": true, "price": "101.50"} +{"id": 7, "partitioner_key": "A", "name": ["Kohli", "Dhoni"], "randomField": 18, "active": 1, "price": {"amount": 99.99, "currency": "USD"}} +{"id": 8, "partitioner_key": "B", "name": false, "randomField": [1, 2, 3], "active": {"status": "active"}, "price": "one hundred"} +{"id": 9, "partitioner_key": "A", "name": ["Kohli", "Dhoni"], "randomField": 18, "active": 1, "price": {"amount": 99.99, "currency": "USD"}} +{"id": 10, "partitioner_key": "B", "name": 3456, "randomField": {"value": 7890}, "active": true, "price": "101.50"} +{"id": 11, "partitioner_key": "B", "name": 3456, "randomField": {"value": 7890}, "active": true, "price": "101.50"} +{"id": 12, "partitioner_key": "B", "name": "None", "randomField": "5678", "active": false, "price": {"currency": "INR", "value": 99.99}} +{"id": 13, "partitioner_key": "A", "name": ["Dhoni", "Kohli"], "randomField": "None", "active": 0, "price": 50} +{"id": 14, "partitioner_key": "A", "name": {"nickname": "VK"}, "randomField": 5678, "active": [true, false], "price": 100} +{"id": 15, "partitioner_key": "C", "name": ["Kohli", "Dhoni"], "randomField": 18, "active": 1, "price": {"amount": 99.99, "currency": "USD"}} +{"id": 16, "partitioner_key": "C", "name": 9876, "randomField": {"identifier": "ORD1234", "type": "order"}, "active": "active", "price": [99, 100, 101]} +{"id": 17, "partitioner_key": "B", "name": 9876, "randomField": {"identifier": "ORD1234", "type": "order"}, "active": "active", "price": [99, 100, 101]} +{"id": 18, "partitioner_key": "C", "name": {"nickname": "VK"}, "randomField": 5678, "active": [true, false], "price": 100} +{"id": 19, "partitioner_key": "C", "name": 9876, "randomField": {"identifier": "ORD1234", "type": "order"}, "active": "active", "price": [99, 100, 101]} +{"id": 20, "partitioner_key": "B", "name": {"first": "MS", "last": "Dhoni"}, "randomField": [9, 8, 7], "active": "no", "price": 101.5} +{"id": 21, "partitioner_key": "B", "name": {"first": "Virat", "last": "Kohli"}, "randomField": "1234", "active": "yes", "price": "99.99"} +{"id": 22, "partitioner_key": "A", "name": false, "randomField": [1, 2, 3], "active": {"status": "active"}, "price": "one hundred"} +{"id": 23, "partitioner_key": "C", "name": ["Kohli", "Dhoni"], "randomField": 18, "active": 1, "price": {"amount": 99.99, "currency": "USD"}} +{"id": 24, "partitioner_key": "B", "name": false, "randomField": [1, 2, 3], "active": {"status": "active"}, "price": "one hundred"} +{"id": 25, "partitioner_key": "C", "name": ["Dhoni", "Kohli"], "randomField": "None", "active": 0, "price": 50} +{"id": 26, "partitioner_key": "A", "name": false, "randomField": [1, 2, 3], "active": {"status": "active"}, "price": "one hundred"} +{"id": 27, "partitioner_key": "A", "name": "None", "randomField": "5678", "active": false, "price": {"currency": "INR", "value": 99.99}} +{"id": 28, "partitioner_key": "B", "name": {"first": "MS", "last": "Dhoni"}, "randomField": [9, 8, 7], "active": "no", "price": 101.5} +{"id": 29, "partitioner_key": "C", "name": {"first": "Virat", "last": "Kohli"}, "randomField": "1234", "active": "yes", "price": "99.99"} +{"id": 30, "partitioner_key": "C", "name": 9876, "randomField": {"identifier": "ORD1234", "type": "order"}, "active": "active", "price": [99, 100, 101]} +{"id": 31, "partitioner_key": "C", "name": "None", "randomField": "5678", "active": false, "price": {"currency": "INR", "value": 99.99}} +{"id": 32, "partitioner_key": "B", "name": {"nickname": "VK"}, "randomField": 5678, "active": [true, false], "price": 100} +{"id": 33, "partitioner_key": "C", "name": 3456, "randomField": {"value": 7890}, "active": true, "price": "101.50"} +{"id": 34, "partitioner_key": "B", "name": 3456, "randomField": {"value": 7890}, "active": true, "price": "101.50"} +{"id": 35, "partitioner_key": "B", "name": 9876, "randomField": {"identifier": "ORD1234", "type": "order"}, "active": "active", "price": [99, 100, 101]} +{"id": 36, "partitioner_key": "B", "name": ["Dhoni", "Kohli"], "randomField": "None", "active": 0, "price": 50} +{"id": 37, "partitioner_key": "B", "name": 3456, "randomField": {"value": 7890}, "active": true, "price": "101.50"} +{"id": 38, "partitioner_key": "A", "name": 9876, "randomField": {"identifier": "ORD1234", "type": "order"}, "active": "active", "price": [99, 100, 101]} +{"id": 39, "partitioner_key": "B", "name": {"nickname": "VK"}, "randomField": 5678, "active": [true, false], "price": 100} +{"id": 40, "partitioner_key": "C", "name": {"first": "Virat", "last": "Kohli"}, "randomField": "1234", "active": "yes", "price": "99.99"} +{"id": 41, "partitioner_key": "A", "name": false, "randomField": [1, 2, 3], "active": {"status": "active"}, "price": "one hundred"} +{"id": 42, "partitioner_key": "A", "name": "None", "randomField": "5678", "active": false, "price": {"currency": "INR", "value": 99.99}} +{"id": 43, "partitioner_key": "C", "name": {"first": "MS", "last": "Dhoni"}, "randomField": [9, 8, 7], "active": "no", "price": 101.5} +{"id": 44, "partitioner_key": "C", "name": ["Kohli", "Dhoni"], "randomField": 18, "active": 1, "price": {"amount": 99.99, "currency": "USD"}} +{"id": 45, "partitioner_key": "A", "name": "Virat Kohli", "randomField": 1234, "active": true, "price": 99.99} +{"id": 46, "partitioner_key": "B", "name": false, "randomField": [1, 2, 3], "active": {"status": "active"}, "price": "one hundred"} +{"id": 47, "partitioner_key": "C", "name": ["Dhoni", "Kohli"], "randomField": "None", "active": 0, "price": 50} +{"id": 48, "partitioner_key": "A", "name": ["Dhoni", "Kohli"], "randomField": "None", "active": 0, "price": 50} +{"id": 49, "partitioner_key": "A", "name": {"first": "MS", "last": "Dhoni"}, "randomField": [9, 8, 7], "active": "no", "price": 101.5} +{"id": 50, "partitioner_key": "C", "name": {"first": "MS", "last": "Dhoni"}, "randomField": [9, 8, 7], "active": "no", "price": 101.5} +{"id": 51, "partitioner_key": "B", "name": ["Dhoni", "Kohli"], "randomField": "None", "active": 0, "price": 50} +{"id": 52, "partitioner_key": "A", "name": ["Dhoni", "Kohli"], "randomField": "None", "active": 0, "price": 50} +{"id": 53, "partitioner_key": "C", "name": 3456, "randomField": {"value": 7890}, "active": true, "price": "101.50"} +{"id": 54, "partitioner_key": "B", "name": {"first": "MS", "last": "Dhoni"}, "randomField": [9, 8, 7], "active": "no", "price": 101.5} +{"id": 55, "partitioner_key": "A", "name": "None", "randomField": "5678", "active": false, "price": {"currency": "INR", "value": 99.99}} +{"id": 56, "partitioner_key": "B", "name": "Virat Kohli", "randomField": 1234, "active": true, "price": 99.99} +{"id": 57, "partitioner_key": "B", "name": false, "randomField": [1, 2, 3], "active": {"status": "active"}, "price": "one hundred"} +{"id": 58, "partitioner_key": "B", "name": 3456, "randomField": {"value": 7890}, "active": true, "price": "101.50"} +{"id": 59, "partitioner_key": "A", "name": ["Dhoni", "Kohli"], "randomField": "None", "active": 0, "price": 50} +{"id": 60, "partitioner_key": "C", "name": ["Kohli", "Dhoni"], "randomField": 18, "active": 1, "price": {"amount": 99.99, "currency": "USD"}} +{"id": 61, "partitioner_key": "C", "name": 9876, "randomField": {"identifier": "ORD1234", "type": "order"}, "active": "active", "price": [99, 100, 101]} +{"id": 62, "partitioner_key": "A", "name": {"first": "MS", "last": "Dhoni"}, "randomField": [9, 8, 7], "active": "no", "price": 101.5} +{"id": 63, "partitioner_key": "A", "name": false, "randomField": [1, 2, 3], "active": {"status": "active"}, "price": "one hundred"} +{"id": 64, "partitioner_key": "C", "name": ["Kohli", "Dhoni"], "randomField": 18, "active": 1, "price": {"amount": 99.99, "currency": "USD"}} +{"id": 65, "partitioner_key": "A", "name": {"nickname": "VK"}, "randomField": 5678, "active": [true, false], "price": 100} +{"id": 66, "partitioner_key": "B", "name": {"first": "MS", "last": "Dhoni"}, "randomField": [9, 8, 7], "active": "no", "price": 101.5} +{"id": 67, "partitioner_key": "B", "name": "None", "randomField": "5678", "active": false, "price": {"currency": "INR", "value": 99.99}} +{"id": 68, "partitioner_key": "C", "name": 3456, "randomField": {"value": 7890}, "active": true, "price": "101.50"} +{"id": 69, "partitioner_key": "C", "name": ["Dhoni", "Kohli"], "randomField": "None", "active": 0, "price": 50} +{"id": 70, "partitioner_key": "A", "name": ["Kohli", "Dhoni"], "randomField": 18, "active": 1, "price": {"amount": 99.99, "currency": "USD"}} +{"id": 71, "partitioner_key": "A", "name": ["Kohli", "Dhoni"], "randomField": 18, "active": 1, "price": {"amount": 99.99, "currency": "USD"}} +{"id": 72, "partitioner_key": "B", "name": ["Kohli", "Dhoni"], "randomField": 18, "active": 1, "price": {"amount": 99.99, "currency": "USD"}} +{"id": 73, "partitioner_key": "C", "name": false, "randomField": [1, 2, 3], "active": {"status": "active"}, "price": "one hundred"} +{"id": 74, "partitioner_key": "C", "name": "Virat Kohli", "randomField": 1234, "active": true, "price": 99.99} +{"id": 75, "partitioner_key": "A", "name": 3456, "randomField": {"value": 7890}, "active": true, "price": "101.50"} +{"id": 76, "partitioner_key": "B", "name": ["Dhoni", "Kohli"], "randomField": "None", "active": 0, "price": 50} +{"id": 77, "partitioner_key": "A", "name": false, "randomField": [1, 2, 3], "active": {"status": "active"}, "price": "one hundred"} +{"id": 78, "partitioner_key": "A", "name": {"first": "MS", "last": "Dhoni"}, "randomField": [9, 8, 7], "active": "no", "price": 101.5} +{"id": 79, "partitioner_key": "C", "name": {"first": "Virat", "last": "Kohli"}, "randomField": "1234", "active": "yes", "price": "99.99"} +{"id": 80, "partitioner_key": "A", "name": {"first": "Virat", "last": "Kohli"}, "randomField": "1234", "active": "yes", "price": "99.99"} +{"id": 81, "partitioner_key": "C", "name": 9876, "randomField": {"identifier": "ORD1234", "type": "order"}, "active": "active", "price": [99, 100, 101]} +{"id": 82, "partitioner_key": "A", "name": {"first": "MS", "last": "Dhoni"}, "randomField": [9, 8, 7], "active": "no", "price": 101.5} +{"id": 83, "partitioner_key": "A", "name": "None", "randomField": "5678", "active": false, "price": {"currency": "INR", "value": 99.99}} +{"id": 84, "partitioner_key": "A", "name": {"first": "MS", "last": "Dhoni"}, "randomField": [9, 8, 7], "active": "no", "price": 101.5} +{"id": 85, "partitioner_key": "C", "name": ["Dhoni", "Kohli"], "randomField": "None", "active": 0, "price": 50} +{"id": 86, "partitioner_key": "C", "name": {"first": "Virat", "last": "Kohli"}, "randomField": "1234", "active": "yes", "price": "99.99"} +{"id": 87, "partitioner_key": "C", "name": ["Kohli", "Dhoni"], "randomField": 18, "active": 1, "price": {"amount": 99.99, "currency": "USD"}} +{"id": 88, "partitioner_key": "A", "name": {"nickname": "VK"}, "randomField": 5678, "active": [true, false], "price": 100} +{"id": 89, "partitioner_key": "C", "name": {"first": "Virat", "last": "Kohli"}, "randomField": "1234", "active": "yes", "price": "99.99"} +{"id": 90, "partitioner_key": "C", "name": ["Dhoni", "Kohli"], "randomField": "None", "active": 0, "price": 50} +{"id": 91, "partitioner_key": "A", "name": "Virat Kohli", "randomField": 1234, "active": true, "price": 99.99} +{"id": 92, "partitioner_key": "A", "name": 9876, "randomField": {"identifier": "ORD1234", "type": "order"}, "active": "active", "price": [99, 100, 101]} +{"id": 93, "partitioner_key": "B", "name": 3456, "randomField": {"value": 7890}, "active": true, "price": "101.50"} +{"id": 94, "partitioner_key": "A", "name": {"first": "MS", "last": "Dhoni"}, "randomField": [9, 8, 7], "active": "no", "price": 101.5} +{"id": 95, "partitioner_key": "B", "name": ["Dhoni", "Kohli"], "randomField": "None", "active": 0, "price": 50} +{"id": 96, "partitioner_key": "B", "name": "Virat Kohli", "randomField": 1234, "active": true, "price": 99.99} +{"id": 97, "partitioner_key": "A", "name": 9876, "randomField": {"identifier": "ORD1234", "type": "order"}, "active": "active", "price": [99, 100, 101]} +{"id": 98, "partitioner_key": "B", "name": {"first": "Virat", "last": "Kohli"}, "randomField": "1234", "active": "yes", "price": "99.99"} +{"id": 99, "partitioner_key": "A", "name": 9876, "randomField": {"identifier": "ORD1234", "type": "order"}, "active": "active", "price": [99, 100, 101]} +{"id": 100, "partitioner_key": "C", "name": {"first": "Virat", "last": "Kohli"}, "randomField": "1234", "active": "yes", "price": "99.99"} diff --git a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/ExternalDatasetTestUtils.java b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/ExternalDatasetTestUtils.java index 07789b764f..c50b391fa2 100644 --- a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/ExternalDatasetTestUtils.java +++ b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/ExternalDatasetTestUtils.java @@ -418,6 +418,8 @@ public class ExternalDatasetTestUtils { loadData(generatedDataBasePath, "", "heterogeneous_1.avro", definition, definitionSegment, false, false); loadData(generatedDataBasePath, "", "heterogeneous_2.avro", definition, definitionSegment, false, false); loadData(generatedDataBasePath, "", "avro_type.avro", definition, definitionSegment, false, false); + loadData(generatedDataBasePath, "", "partition_heterogeneous.avro", definition, definitionSegment, false, + false); Collection<File> files = IoUtil.getMatchingFiles(Paths.get(generatedDataBasePath + "/external-filter"), AVRO_FILTER); diff --git a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/avro/AvroFileExampleGeneratorUtil.java b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/avro/AvroFileExampleGeneratorUtil.java index 60f4b83b62..034c430297 100644 --- a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/avro/AvroFileExampleGeneratorUtil.java +++ b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/avro/AvroFileExampleGeneratorUtil.java @@ -22,7 +22,9 @@ package org.apache.asterix.test.external_dataset.avro; import java.io.File; import java.io.IOException; import java.nio.ByteBuffer; +import java.util.ArrayList; import java.util.HashMap; +import java.util.List; import java.util.Map; import org.apache.avro.Schema; @@ -36,12 +38,14 @@ import org.junit.Test; public class AvroFileExampleGeneratorUtil { private static final String SCHEMA_STRING = "{\n" + " \"type\": \"record\",\n" + " \"name\": \"SimpleRecord\",\n" + " \"namespace\": \"com.example\",\n" + " \"fields\": [\n" + " {\n" - + " \"name\": \"unionField\",\n" + " \"type\": [\"int\", \"string\", \"bytes\"],\n" - + " \"doc\": \"This field can be either an int or a string.\"\n" + " },\n" + " {\n" - + " \"name\": \"mapField\",\n" + " \"type\": {\n" + " \"type\": \"map\",\n" - + " \"values\": \"int\",\n" + " \"doc\": \"This is a map of string keys to int values.\"\n" - + " },\n" + " \"doc\": \"This field represents a map with string keys and integer values.\"\n" - + " },\n" + " {\n" + " \"name\": \"nestedRecord\",\n" + " \"type\": {\n" + + " \"name\": \"unionField\",\n" + " \"type\": [\"int\", \"string\", \"bytes\", {\n" + + " \"type\": \"map\",\n" + " \"values\": \"int\"\n" + " }, {\n" + + " \"type\": \"array\",\n" + " \"items\": \"string\"\n" + " }],\n" + + " \"doc\": \"This field can be an int, a map of int values, or an array of strings.\"\n" + " },\n" + + " {\n" + " \"name\": \"mapField\",\n" + " \"type\": {\n" + " \"type\": \"map\",\n" + + " \"values\": \"int\"\n" + " },\n" + + " \"doc\": \"This field represents a map with string keys and integer values.\"\n" + " },\n" + + " {\n" + " \"name\": \"nestedRecord\",\n" + " \"type\": {\n" + " \"type\": \"record\",\n" + " \"name\": \"NestedRecord\",\n" + " \"fields\": [\n" + " {\n" + " \"name\": \"nestedInt\",\n" + " \"type\": \"int\"\n" + " },\n" + " {\n" + " \"name\": \"nestedString\",\n" @@ -59,7 +63,7 @@ public class AvroFileExampleGeneratorUtil { + " \"name\": \"bytesField\",\n" + " \"type\": \"bytes\",\n" + " \"doc\": \"This is a bytes field.\"\n" + " },\n" + " {\n" + " \"name\": \"stringField\",\n" + " \"type\": \"string\",\n" - + " \"doc\": \"This is a string field.\"\n" + " }\n" + " ]\n" + "}\n"; + + " \"doc\": \"This is a string field.\"\n" + " }\n" + " ]\n" + "}"; private static final String AVRO_GEN_BASEDIR = "target/generated_avro_files"; private static final String FILE_NAME = "avro_type.avro"; @@ -111,6 +115,43 @@ public class AvroFileExampleGeneratorUtil { record2.put("bytesField", ByteBuffer.wrap(new byte[] { 0x06, 0x04 })); record2.put("stringField", "Sample Values"); dataFileWriter.append(record2); + + //Third record to be added + GenericRecord record3 = new GenericData.Record(schema); + record3.put("unionField", map2); + Map<String, Integer> map3 = new HashMap<>(); + map3.put("key4", 121); + map3.put("key5", 45); + record3.put("mapField", map3); + record3.put("nestedRecord", nestedRecord); + record3.put("booleanField", false); + record3.put("intField", 53344); + record3.put("longField", 60L); + record3.put("floatField", 137.62f); + record3.put("doubleField", 5.77777); + record3.put("bytesField", ByteBuffer.wrap(new byte[] { 0x02, 0x02 })); + record3.put("stringField", "Third Example"); + dataFileWriter.append(record3); + + //Fourth record to be added + GenericRecord record4 = new GenericData.Record(schema); + List<String> arrayField = new ArrayList<>(); + arrayField.add("value1"); + arrayField.add("value2"); + record4.put("unionField", arrayField); + Map<String, Integer> map4 = new HashMap<>(); + map4.put("key6", 112); + map4.put("key7", 548); + record4.put("mapField", map4); + record4.put("nestedRecord", nestedRecord); + record4.put("booleanField", true); + record4.put("intField", 544); + record4.put("longField", 62L); + record4.put("floatField", 137.62f); + record4.put("doubleField", 51.7787); + record4.put("bytesField", ByteBuffer.wrap(new byte[] { 0x02, 0x02 })); + record4.put("stringField", "Fourth Example"); + dataFileWriter.append(record4); } catch (IOException e) { System.err.println("Failed to write AVRO file: " + e.getMessage()); e.printStackTrace(); diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-union/avro-union.01.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-union/avro-union.01.ddl.sqlpp index 65a2b38850..8c00ded11a 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-union/avro-union.01.ddl.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-union/avro-union.01.ddl.sqlpp @@ -38,4 +38,13 @@ CREATE EXTERNAL DATASET AvroDataset(AvroType) USING %adapter% ("definition"="avro-data/reviews"), ("include"="*avro_type.avro"), ("format" = "avro") -); \ No newline at end of file +); + +CREATE EXTERNAL DATASET AvroDataset2(AvroType) USING %adapter% +( + %template%, + ("container"="playground"), + ("definition"="avro-data/reviews"), + ("include"="*partition_heterogeneous.avro"), + ("format" = "avro") +); diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-union/avro-union.01.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-union/avro-union.03.query.sqlpp similarity index 68% copy from asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-union/avro-union.01.ddl.sqlpp copy to asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-union/avro-union.03.query.sqlpp index 65a2b38850..a7ccec064f 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-union/avro-union.01.ddl.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-union/avro-union.03.query.sqlpp @@ -17,25 +17,14 @@ * under the License. */ /* -* Description : Field access pushdown -* Expected Res : Success -* Date : Feb 23rd 2024 -*/ - -DROP DATAVERSE test IF EXISTS; -CREATE DATAVERSE test; + * Description : Request All fields + * Expected Res : Success + * Date : Dec 2nd 2024 + */ USE test; -CREATE TYPE AvroType as { -}; - -CREATE EXTERNAL DATASET AvroDataset(AvroType) USING %adapter% -( - %template%, - ("container"="playground"), - ("definition"="avro-data/reviews"), - ("include"="*avro_type.avro"), - ("format" = "avro") -); \ No newline at end of file +SELECT VALUE active +FROM AvroDataset2 +ORDER BY AvroDataset2.id; \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/heterogeneous-access/heterogeneous-access.1.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/heterogeneous-access/heterogeneous-access.1.ddl.sqlpp index ce5eb8a257..c7dcf7a330 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/heterogeneous-access/heterogeneous-access.1.ddl.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/heterogeneous-access/heterogeneous-access.1.ddl.sqlpp @@ -36,6 +36,6 @@ CREATE EXTERNAL DATASET AvroDataset(AvroType) USING %adapter% %template%, ("container"="playground"), ("definition"="avro-data/reviews"), - ("include"="*heterogeneous*"), + ("include"="*heterogeneous_*"), ("format" = "avro") ); \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/avro-types/avro-map/avro-map.02.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/avro-types/avro-map/avro-map.02.adm index 92d5ea1da8..e3ad7cddc8 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/avro-types/avro-map/avro-map.02.adm +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/avro-types/avro-map/avro-map.02.adm @@ -1,2 +1,4 @@ [ { "key": "key1", "value": 1 }, { "key": "key2", "value": 2 } ] [ { "key": "key3", "value": 3 }, { "key": "key4", "value": 4 } ] +[ { "key": "key5", "value": 45 }, { "key": "key4", "value": 121 } ] +[ { "key": "key6", "value": 112 }, { "key": "key7", "value": 548 } ] diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/avro-types/avro-map/avro-map.03.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/avro-types/avro-map/avro-map.03.adm index 73b283a1ee..6633f00e3f 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/avro-types/avro-map/avro-map.03.adm +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/avro-types/avro-map/avro-map.03.adm @@ -1,2 +1,4 @@ { "key": "key1", "value": 1 } -{ "key": "key3", "value": 3 } \ No newline at end of file +{ "key": "key3", "value": 3 } +{ "key": "key5", "value": 45 } +{ "key": "key6", "value": 112 } diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/avro-types/avro-nested-records/avro-nested-records.02.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/avro-types/avro-nested-records/avro-nested-records.02.adm index db09d84496..6882813d8b 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/avro-types/avro-nested-records/avro-nested-records.02.adm +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/avro-types/avro-nested-records/avro-nested-records.02.adm @@ -1,2 +1,4 @@ { "nestedInt": 100, "nestedString": "Inside Nested" } -{ "nestedInt": 100, "nestedString": "Inside Nested" } \ No newline at end of file +{ "nestedInt": 100, "nestedString": "Inside Nested" } +{ "nestedInt": 100, "nestedString": "Inside Nested" } +{ "nestedInt": 100, "nestedString": "Inside Nested" } diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/avro-types/avro-nested-records/avro-nested-records.03.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/avro-types/avro-nested-records/avro-nested-records.03.adm index 4eb537c528..c5079fa2cf 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/avro-types/avro-nested-records/avro-nested-records.03.adm +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/avro-types/avro-nested-records/avro-nested-records.03.adm @@ -1,2 +1,4 @@ 100 -100 \ No newline at end of file +100 +100 +100 diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/avro-types/avro-primitives/avro-primitives.02.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/avro-types/avro-primitives/avro-primitives.02.adm index d25232800f..e50fb217e0 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/avro-types/avro-primitives/avro-primitives.02.adm +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/avro-types/avro-primitives/avro-primitives.02.adm @@ -1,2 +1,4 @@ true -false \ No newline at end of file +false +false +true diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/avro-types/avro-primitives/avro-primitives.03.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/avro-types/avro-primitives/avro-primitives.03.adm index 8a642e5efc..d1a93287f2 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/avro-types/avro-primitives/avro-primitives.03.adm +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/avro-types/avro-primitives/avro-primitives.03.adm @@ -1,2 +1,4 @@ { "intField": 32, "longField": 64, "floatField": 1.0, "doubleField": 2.0 } -{ "intField": 54, "longField": 60, "floatField": 3.6, "doubleField": 5.77777 } \ No newline at end of file +{ "intField": 54, "longField": 60, "floatField": 3.5999999046325684, "doubleField": 5.77777 } +{ "intField": 53344, "longField": 60, "floatField": 137.6199951171875, "doubleField": 5.77777 } +{ "intField": 544, "longField": 62, "floatField": 137.6199951171875, "doubleField": 51.7787 } diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/avro-types/avro-union/avro-union.02.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/avro-types/avro-union/avro-union.02.adm index 8fd021217b..464a7b9495 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/avro-types/avro-union/avro-union.02.adm +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/avro-types/avro-union/avro-union.02.adm @@ -1,2 +1,4 @@ 42 hex("0105") +[ { "key": "key3", "value": 3 }, { "key": "key4", "value": 4 } ] +[ "value1", "value2" ] diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/avro-types/avro-union/avro-union.03.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/avro-types/avro-union/avro-union.03.adm new file mode 100644 index 0000000000..4a25d6c5f6 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/avro-types/avro-union/avro-union.03.adm @@ -0,0 +1,100 @@ +[ true, false ] +1 +{ "status": "active" } +[ true, false ] +"yes" +true +1 +{ "status": "active" } +1 +true +true +false +0 +[ true, false ] +1 +"active" +"active" +[ true, false ] +"active" +"no" +"yes" +{ "status": "active" } +1 +{ "status": "active" } +0 +{ "status": "active" } +false +"no" +"yes" +"active" +false +[ true, false ] +true +true +"active" +0 +true +"active" +[ true, false ] +"yes" +{ "status": "active" } +false +"no" +1 +true +{ "status": "active" } +0 +0 +"no" +"no" +0 +0 +true +"no" +false +true +{ "status": "active" } +true +0 +1 +"active" +"no" +{ "status": "active" } +1 +[ true, false ] +"no" +false +true +0 +1 +1 +1 +{ "status": "active" } +true +true +0 +{ "status": "active" } +"no" +"yes" +"yes" +"active" +"no" +false +"no" +0 +"yes" +1 +[ true, false ] +"yes" +0 +true +"active" +true +"no" +0 +true +"active" +"yes" +"active" +"yes" diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/AvroDataParser.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/AvroDataParser.java index 8088daa307..c3563ccbbb 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/AvroDataParser.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/AvroDataParser.java @@ -43,6 +43,7 @@ import org.apache.asterix.om.pointables.base.DefaultOpenFieldType; import org.apache.asterix.om.types.ATypeTag; import org.apache.avro.AvroRuntimeException; import org.apache.avro.Schema; +import org.apache.avro.generic.GenericArray; import org.apache.avro.generic.GenericData; import org.apache.avro.generic.GenericRecord; import org.apache.hyracks.api.exceptions.HyracksDataException; @@ -180,6 +181,10 @@ public class AvroDataParser extends AbstractDataParser implements IRecordDataPar return value instanceof ByteBuffer; case RECORD: return value instanceof GenericData.Record; + case ARRAY: + return value instanceof GenericArray; + case MAP: + return value instanceof Map; default: return false; }
