This is an automated email from the ASF dual-hosted git repository.

mblow pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/asterixdb.git

commit 641424faebcdd39345b496e8bf83d869c40e458f
Author: ayush.tripathi <[email protected]>
AuthorDate: Mon Dec 2 21:03:02 2024 +0530

    [ASTERIXDB-3353][EXT] Fix handling of arrays in Avro union types
    
    - user model changes: no
    - storage format changes: no
    - interface changes: no
    
    Details:
    Resolves issues in the Avro data parser when processing union types 
containing arrays and maps.
    
    Ext-ref: MB-64467
    Change-Id: Icbb42892984a2c30d8c3faa4dfbf6d16f95f61cc
    Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/19167
    Tested-by: Jenkins <[email protected]>
    Reviewed-by: Ayush Tripathi <[email protected]>
    Reviewed-by: Murtadha Hubail <[email protected]>
---
 .../data/hdfs/parquet/partition_heterogeneous.json | 100 +++++++++++++++++++++
 .../external_dataset/ExternalDatasetTestUtils.java |   2 +
 .../avro/AvroFileExampleGeneratorUtil.java         |  55 ++++++++++--
 .../avro-types/avro-union/avro-union.01.ddl.sqlpp  |  11 ++-
 ...nion.01.ddl.sqlpp => avro-union.03.query.sqlpp} |  25 ++----
 .../heterogeneous-access.1.ddl.sqlpp               |   2 +-
 .../avro/avro-types/avro-map/avro-map.02.adm       |   2 +
 .../avro/avro-types/avro-map/avro-map.03.adm       |   4 +-
 .../avro-nested-records/avro-nested-records.02.adm |   4 +-
 .../avro-nested-records/avro-nested-records.03.adm |   4 +-
 .../avro-primitives/avro-primitives.02.adm         |   4 +-
 .../avro-primitives/avro-primitives.03.adm         |   4 +-
 .../avro/avro-types/avro-union/avro-union.02.adm   |   2 +
 .../avro/avro-types/avro-union/avro-union.03.adm   | 100 +++++++++++++++++++++
 .../asterix/external/parser/AvroDataParser.java    |   5 ++
 15 files changed, 292 insertions(+), 32 deletions(-)

diff --git 
a/asterixdb/asterix-app/data/hdfs/parquet/partition_heterogeneous.json 
b/asterixdb/asterix-app/data/hdfs/parquet/partition_heterogeneous.json
new file mode 100644
index 0000000000..e6179d600c
--- /dev/null
+++ b/asterixdb/asterix-app/data/hdfs/parquet/partition_heterogeneous.json
@@ -0,0 +1,100 @@
+{"id": 1, "partitioner_key": "A", "name": {"nickname": "VK"}, "randomField": 
5678, "active": [true, false], "price": 100}
+{"id": 2, "partitioner_key": "A", "name": ["Kohli", "Dhoni"], "randomField": 
18, "active": 1, "price": {"amount": 99.99, "currency": "USD"}}
+{"id": 3, "partitioner_key": "C", "name": false, "randomField": [1, 2, 3], 
"active": {"status": "active"}, "price": "one hundred"}
+{"id": 4, "partitioner_key": "C", "name": {"nickname": "VK"}, "randomField": 
5678, "active": [true, false], "price": 100}
+{"id": 5, "partitioner_key": "A", "name": {"first": "Virat", "last": "Kohli"}, 
"randomField": "1234", "active": "yes", "price": "99.99"}
+{"id": 6, "partitioner_key": "C", "name": 3456, "randomField": {"value": 
7890}, "active": true, "price": "101.50"}
+{"id": 7, "partitioner_key": "A", "name": ["Kohli", "Dhoni"], "randomField": 
18, "active": 1, "price": {"amount": 99.99, "currency": "USD"}}
+{"id": 8, "partitioner_key": "B", "name": false, "randomField": [1, 2, 3], 
"active": {"status": "active"}, "price": "one hundred"}
+{"id": 9, "partitioner_key": "A", "name": ["Kohli", "Dhoni"], "randomField": 
18, "active": 1, "price": {"amount": 99.99, "currency": "USD"}}
+{"id": 10, "partitioner_key": "B", "name": 3456, "randomField": {"value": 
7890}, "active": true, "price": "101.50"}
+{"id": 11, "partitioner_key": "B", "name": 3456, "randomField": {"value": 
7890}, "active": true, "price": "101.50"}
+{"id": 12, "partitioner_key": "B", "name": "None", "randomField": "5678", 
"active": false, "price": {"currency": "INR", "value": 99.99}}
+{"id": 13, "partitioner_key": "A", "name": ["Dhoni", "Kohli"], "randomField": 
"None", "active": 0, "price": 50}
+{"id": 14, "partitioner_key": "A", "name": {"nickname": "VK"}, "randomField": 
5678, "active": [true, false], "price": 100}
+{"id": 15, "partitioner_key": "C", "name": ["Kohli", "Dhoni"], "randomField": 
18, "active": 1, "price": {"amount": 99.99, "currency": "USD"}}
+{"id": 16, "partitioner_key": "C", "name": 9876, "randomField": {"identifier": 
"ORD1234", "type": "order"}, "active": "active", "price": [99, 100, 101]}
+{"id": 17, "partitioner_key": "B", "name": 9876, "randomField": {"identifier": 
"ORD1234", "type": "order"}, "active": "active", "price": [99, 100, 101]}
+{"id": 18, "partitioner_key": "C", "name": {"nickname": "VK"}, "randomField": 
5678, "active": [true, false], "price": 100}
+{"id": 19, "partitioner_key": "C", "name": 9876, "randomField": {"identifier": 
"ORD1234", "type": "order"}, "active": "active", "price": [99, 100, 101]}
+{"id": 20, "partitioner_key": "B", "name": {"first": "MS", "last": "Dhoni"}, 
"randomField": [9, 8, 7], "active": "no", "price": 101.5}
+{"id": 21, "partitioner_key": "B", "name": {"first": "Virat", "last": 
"Kohli"}, "randomField": "1234", "active": "yes", "price": "99.99"}
+{"id": 22, "partitioner_key": "A", "name": false, "randomField": [1, 2, 3], 
"active": {"status": "active"}, "price": "one hundred"}
+{"id": 23, "partitioner_key": "C", "name": ["Kohli", "Dhoni"], "randomField": 
18, "active": 1, "price": {"amount": 99.99, "currency": "USD"}}
+{"id": 24, "partitioner_key": "B", "name": false, "randomField": [1, 2, 3], 
"active": {"status": "active"}, "price": "one hundred"}
+{"id": 25, "partitioner_key": "C", "name": ["Dhoni", "Kohli"], "randomField": 
"None", "active": 0, "price": 50}
+{"id": 26, "partitioner_key": "A", "name": false, "randomField": [1, 2, 3], 
"active": {"status": "active"}, "price": "one hundred"}
+{"id": 27, "partitioner_key": "A", "name": "None", "randomField": "5678", 
"active": false, "price": {"currency": "INR", "value": 99.99}}
+{"id": 28, "partitioner_key": "B", "name": {"first": "MS", "last": "Dhoni"}, 
"randomField": [9, 8, 7], "active": "no", "price": 101.5}
+{"id": 29, "partitioner_key": "C", "name": {"first": "Virat", "last": 
"Kohli"}, "randomField": "1234", "active": "yes", "price": "99.99"}
+{"id": 30, "partitioner_key": "C", "name": 9876, "randomField": {"identifier": 
"ORD1234", "type": "order"}, "active": "active", "price": [99, 100, 101]}
+{"id": 31, "partitioner_key": "C", "name": "None", "randomField": "5678", 
"active": false, "price": {"currency": "INR", "value": 99.99}}
+{"id": 32, "partitioner_key": "B", "name": {"nickname": "VK"}, "randomField": 
5678, "active": [true, false], "price": 100}
+{"id": 33, "partitioner_key": "C", "name": 3456, "randomField": {"value": 
7890}, "active": true, "price": "101.50"}
+{"id": 34, "partitioner_key": "B", "name": 3456, "randomField": {"value": 
7890}, "active": true, "price": "101.50"}
+{"id": 35, "partitioner_key": "B", "name": 9876, "randomField": {"identifier": 
"ORD1234", "type": "order"}, "active": "active", "price": [99, 100, 101]}
+{"id": 36, "partitioner_key": "B", "name": ["Dhoni", "Kohli"], "randomField": 
"None", "active": 0, "price": 50}
+{"id": 37, "partitioner_key": "B", "name": 3456, "randomField": {"value": 
7890}, "active": true, "price": "101.50"}
+{"id": 38, "partitioner_key": "A", "name": 9876, "randomField": {"identifier": 
"ORD1234", "type": "order"}, "active": "active", "price": [99, 100, 101]}
+{"id": 39, "partitioner_key": "B", "name": {"nickname": "VK"}, "randomField": 
5678, "active": [true, false], "price": 100}
+{"id": 40, "partitioner_key": "C", "name": {"first": "Virat", "last": 
"Kohli"}, "randomField": "1234", "active": "yes", "price": "99.99"}
+{"id": 41, "partitioner_key": "A", "name": false, "randomField": [1, 2, 3], 
"active": {"status": "active"}, "price": "one hundred"}
+{"id": 42, "partitioner_key": "A", "name": "None", "randomField": "5678", 
"active": false, "price": {"currency": "INR", "value": 99.99}}
+{"id": 43, "partitioner_key": "C", "name": {"first": "MS", "last": "Dhoni"}, 
"randomField": [9, 8, 7], "active": "no", "price": 101.5}
+{"id": 44, "partitioner_key": "C", "name": ["Kohli", "Dhoni"], "randomField": 
18, "active": 1, "price": {"amount": 99.99, "currency": "USD"}}
+{"id": 45, "partitioner_key": "A", "name": "Virat Kohli", "randomField": 1234, 
"active": true, "price": 99.99}
+{"id": 46, "partitioner_key": "B", "name": false, "randomField": [1, 2, 3], 
"active": {"status": "active"}, "price": "one hundred"}
+{"id": 47, "partitioner_key": "C", "name": ["Dhoni", "Kohli"], "randomField": 
"None", "active": 0, "price": 50}
+{"id": 48, "partitioner_key": "A", "name": ["Dhoni", "Kohli"], "randomField": 
"None", "active": 0, "price": 50}
+{"id": 49, "partitioner_key": "A", "name": {"first": "MS", "last": "Dhoni"}, 
"randomField": [9, 8, 7], "active": "no", "price": 101.5}
+{"id": 50, "partitioner_key": "C", "name": {"first": "MS", "last": "Dhoni"}, 
"randomField": [9, 8, 7], "active": "no", "price": 101.5}
+{"id": 51, "partitioner_key": "B", "name": ["Dhoni", "Kohli"], "randomField": 
"None", "active": 0, "price": 50}
+{"id": 52, "partitioner_key": "A", "name": ["Dhoni", "Kohli"], "randomField": 
"None", "active": 0, "price": 50}
+{"id": 53, "partitioner_key": "C", "name": 3456, "randomField": {"value": 
7890}, "active": true, "price": "101.50"}
+{"id": 54, "partitioner_key": "B", "name": {"first": "MS", "last": "Dhoni"}, 
"randomField": [9, 8, 7], "active": "no", "price": 101.5}
+{"id": 55, "partitioner_key": "A", "name": "None", "randomField": "5678", 
"active": false, "price": {"currency": "INR", "value": 99.99}}
+{"id": 56, "partitioner_key": "B", "name": "Virat Kohli", "randomField": 1234, 
"active": true, "price": 99.99}
+{"id": 57, "partitioner_key": "B", "name": false, "randomField": [1, 2, 3], 
"active": {"status": "active"}, "price": "one hundred"}
+{"id": 58, "partitioner_key": "B", "name": 3456, "randomField": {"value": 
7890}, "active": true, "price": "101.50"}
+{"id": 59, "partitioner_key": "A", "name": ["Dhoni", "Kohli"], "randomField": 
"None", "active": 0, "price": 50}
+{"id": 60, "partitioner_key": "C", "name": ["Kohli", "Dhoni"], "randomField": 
18, "active": 1, "price": {"amount": 99.99, "currency": "USD"}}
+{"id": 61, "partitioner_key": "C", "name": 9876, "randomField": {"identifier": 
"ORD1234", "type": "order"}, "active": "active", "price": [99, 100, 101]}
+{"id": 62, "partitioner_key": "A", "name": {"first": "MS", "last": "Dhoni"}, 
"randomField": [9, 8, 7], "active": "no", "price": 101.5}
+{"id": 63, "partitioner_key": "A", "name": false, "randomField": [1, 2, 3], 
"active": {"status": "active"}, "price": "one hundred"}
+{"id": 64, "partitioner_key": "C", "name": ["Kohli", "Dhoni"], "randomField": 
18, "active": 1, "price": {"amount": 99.99, "currency": "USD"}}
+{"id": 65, "partitioner_key": "A", "name": {"nickname": "VK"}, "randomField": 
5678, "active": [true, false], "price": 100}
+{"id": 66, "partitioner_key": "B", "name": {"first": "MS", "last": "Dhoni"}, 
"randomField": [9, 8, 7], "active": "no", "price": 101.5}
+{"id": 67, "partitioner_key": "B", "name": "None", "randomField": "5678", 
"active": false, "price": {"currency": "INR", "value": 99.99}}
+{"id": 68, "partitioner_key": "C", "name": 3456, "randomField": {"value": 
7890}, "active": true, "price": "101.50"}
+{"id": 69, "partitioner_key": "C", "name": ["Dhoni", "Kohli"], "randomField": 
"None", "active": 0, "price": 50}
+{"id": 70, "partitioner_key": "A", "name": ["Kohli", "Dhoni"], "randomField": 
18, "active": 1, "price": {"amount": 99.99, "currency": "USD"}}
+{"id": 71, "partitioner_key": "A", "name": ["Kohli", "Dhoni"], "randomField": 
18, "active": 1, "price": {"amount": 99.99, "currency": "USD"}}
+{"id": 72, "partitioner_key": "B", "name": ["Kohli", "Dhoni"], "randomField": 
18, "active": 1, "price": {"amount": 99.99, "currency": "USD"}}
+{"id": 73, "partitioner_key": "C", "name": false, "randomField": [1, 2, 3], 
"active": {"status": "active"}, "price": "one hundred"}
+{"id": 74, "partitioner_key": "C", "name": "Virat Kohli", "randomField": 1234, 
"active": true, "price": 99.99}
+{"id": 75, "partitioner_key": "A", "name": 3456, "randomField": {"value": 
7890}, "active": true, "price": "101.50"}
+{"id": 76, "partitioner_key": "B", "name": ["Dhoni", "Kohli"], "randomField": 
"None", "active": 0, "price": 50}
+{"id": 77, "partitioner_key": "A", "name": false, "randomField": [1, 2, 3], 
"active": {"status": "active"}, "price": "one hundred"}
+{"id": 78, "partitioner_key": "A", "name": {"first": "MS", "last": "Dhoni"}, 
"randomField": [9, 8, 7], "active": "no", "price": 101.5}
+{"id": 79, "partitioner_key": "C", "name": {"first": "Virat", "last": 
"Kohli"}, "randomField": "1234", "active": "yes", "price": "99.99"}
+{"id": 80, "partitioner_key": "A", "name": {"first": "Virat", "last": 
"Kohli"}, "randomField": "1234", "active": "yes", "price": "99.99"}
+{"id": 81, "partitioner_key": "C", "name": 9876, "randomField": {"identifier": 
"ORD1234", "type": "order"}, "active": "active", "price": [99, 100, 101]}
+{"id": 82, "partitioner_key": "A", "name": {"first": "MS", "last": "Dhoni"}, 
"randomField": [9, 8, 7], "active": "no", "price": 101.5}
+{"id": 83, "partitioner_key": "A", "name": "None", "randomField": "5678", 
"active": false, "price": {"currency": "INR", "value": 99.99}}
+{"id": 84, "partitioner_key": "A", "name": {"first": "MS", "last": "Dhoni"}, 
"randomField": [9, 8, 7], "active": "no", "price": 101.5}
+{"id": 85, "partitioner_key": "C", "name": ["Dhoni", "Kohli"], "randomField": 
"None", "active": 0, "price": 50}
+{"id": 86, "partitioner_key": "C", "name": {"first": "Virat", "last": 
"Kohli"}, "randomField": "1234", "active": "yes", "price": "99.99"}
+{"id": 87, "partitioner_key": "C", "name": ["Kohli", "Dhoni"], "randomField": 
18, "active": 1, "price": {"amount": 99.99, "currency": "USD"}}
+{"id": 88, "partitioner_key": "A", "name": {"nickname": "VK"}, "randomField": 
5678, "active": [true, false], "price": 100}
+{"id": 89, "partitioner_key": "C", "name": {"first": "Virat", "last": 
"Kohli"}, "randomField": "1234", "active": "yes", "price": "99.99"}
+{"id": 90, "partitioner_key": "C", "name": ["Dhoni", "Kohli"], "randomField": 
"None", "active": 0, "price": 50}
+{"id": 91, "partitioner_key": "A", "name": "Virat Kohli", "randomField": 1234, 
"active": true, "price": 99.99}
+{"id": 92, "partitioner_key": "A", "name": 9876, "randomField": {"identifier": 
"ORD1234", "type": "order"}, "active": "active", "price": [99, 100, 101]}
+{"id": 93, "partitioner_key": "B", "name": 3456, "randomField": {"value": 
7890}, "active": true, "price": "101.50"}
+{"id": 94, "partitioner_key": "A", "name": {"first": "MS", "last": "Dhoni"}, 
"randomField": [9, 8, 7], "active": "no", "price": 101.5}
+{"id": 95, "partitioner_key": "B", "name": ["Dhoni", "Kohli"], "randomField": 
"None", "active": 0, "price": 50}
+{"id": 96, "partitioner_key": "B", "name": "Virat Kohli", "randomField": 1234, 
"active": true, "price": 99.99}
+{"id": 97, "partitioner_key": "A", "name": 9876, "randomField": {"identifier": 
"ORD1234", "type": "order"}, "active": "active", "price": [99, 100, 101]}
+{"id": 98, "partitioner_key": "B", "name": {"first": "Virat", "last": 
"Kohli"}, "randomField": "1234", "active": "yes", "price": "99.99"}
+{"id": 99, "partitioner_key": "A", "name": 9876, "randomField": {"identifier": 
"ORD1234", "type": "order"}, "active": "active", "price": [99, 100, 101]}
+{"id": 100, "partitioner_key": "C", "name": {"first": "Virat", "last": 
"Kohli"}, "randomField": "1234", "active": "yes", "price": "99.99"}
diff --git 
a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/ExternalDatasetTestUtils.java
 
b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/ExternalDatasetTestUtils.java
index 07789b764f..c50b391fa2 100644
--- 
a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/ExternalDatasetTestUtils.java
+++ 
b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/ExternalDatasetTestUtils.java
@@ -418,6 +418,8 @@ public class ExternalDatasetTestUtils {
         loadData(generatedDataBasePath, "", "heterogeneous_1.avro", 
definition, definitionSegment, false, false);
         loadData(generatedDataBasePath, "", "heterogeneous_2.avro", 
definition, definitionSegment, false, false);
         loadData(generatedDataBasePath, "", "avro_type.avro", definition, 
definitionSegment, false, false);
+        loadData(generatedDataBasePath, "", "partition_heterogeneous.avro", 
definition, definitionSegment, false,
+                false);
 
         Collection<File> files =
                 IoUtil.getMatchingFiles(Paths.get(generatedDataBasePath + 
"/external-filter"), AVRO_FILTER);
diff --git 
a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/avro/AvroFileExampleGeneratorUtil.java
 
b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/avro/AvroFileExampleGeneratorUtil.java
index 60f4b83b62..034c430297 100644
--- 
a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/avro/AvroFileExampleGeneratorUtil.java
+++ 
b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/avro/AvroFileExampleGeneratorUtil.java
@@ -22,7 +22,9 @@ package org.apache.asterix.test.external_dataset.avro;
 import java.io.File;
 import java.io.IOException;
 import java.nio.ByteBuffer;
+import java.util.ArrayList;
 import java.util.HashMap;
+import java.util.List;
 import java.util.Map;
 
 import org.apache.avro.Schema;
@@ -36,12 +38,14 @@ import org.junit.Test;
 public class AvroFileExampleGeneratorUtil {
     private static final String SCHEMA_STRING = "{\n" + "  \"type\": 
\"record\",\n" + "  \"name\": \"SimpleRecord\",\n"
             + "  \"namespace\": \"com.example\",\n" + "  \"fields\": [\n" + "  
  {\n"
-            + "      \"name\": \"unionField\",\n" + "      \"type\": [\"int\", 
\"string\", \"bytes\"],\n"
-            + "      \"doc\": \"This field can be either an int or a 
string.\"\n" + "    },\n" + "    {\n"
-            + "      \"name\": \"mapField\",\n" + "      \"type\": {\n" + "    
    \"type\": \"map\",\n"
-            + "        \"values\": \"int\",\n" + "        \"doc\": \"This is a 
map of string keys to int values.\"\n"
-            + "      },\n" + "      \"doc\": \"This field represents a map 
with string keys and integer values.\"\n"
-            + "    },\n" + "    {\n" + "      \"name\": \"nestedRecord\",\n" + 
"      \"type\": {\n"
+            + "      \"name\": \"unionField\",\n" + "      \"type\": [\"int\", 
\"string\", \"bytes\", {\n"
+            + "        \"type\": \"map\",\n" + "        \"values\": \"int\"\n" 
+ "      }, {\n"
+            + "        \"type\": \"array\",\n" + "        \"items\": 
\"string\"\n" + "      }],\n"
+            + "      \"doc\": \"This field can be an int, a map of int values, 
or an array of strings.\"\n" + "    },\n"
+            + "    {\n" + "      \"name\": \"mapField\",\n" + "      \"type\": 
{\n" + "        \"type\": \"map\",\n"
+            + "        \"values\": \"int\"\n" + "      },\n"
+            + "      \"doc\": \"This field represents a map with string keys 
and integer values.\"\n" + "    },\n"
+            + "    {\n" + "      \"name\": \"nestedRecord\",\n" + "      
\"type\": {\n"
             + "        \"type\": \"record\",\n" + "        \"name\": 
\"NestedRecord\",\n" + "        \"fields\": [\n"
             + "          {\n" + "            \"name\": \"nestedInt\",\n" + "   
         \"type\": \"int\"\n"
             + "          },\n" + "          {\n" + "            \"name\": 
\"nestedString\",\n"
@@ -59,7 +63,7 @@ public class AvroFileExampleGeneratorUtil {
             + "      \"name\": \"bytesField\",\n" + "      \"type\": 
\"bytes\",\n"
             + "      \"doc\": \"This is a bytes field.\"\n" + "    },\n" + "   
 {\n"
             + "      \"name\": \"stringField\",\n" + "      \"type\": 
\"string\",\n"
-            + "      \"doc\": \"This is a string field.\"\n" + "    }\n" + "  
]\n" + "}\n";
+            + "      \"doc\": \"This is a string field.\"\n" + "    }\n" + "  
]\n" + "}";
 
     private static final String AVRO_GEN_BASEDIR = 
"target/generated_avro_files";
     private static final String FILE_NAME = "avro_type.avro";
@@ -111,6 +115,43 @@ public class AvroFileExampleGeneratorUtil {
             record2.put("bytesField", ByteBuffer.wrap(new byte[] { 0x06, 0x04 
}));
             record2.put("stringField", "Sample Values");
             dataFileWriter.append(record2);
+
+            //Third record to be added
+            GenericRecord record3 = new GenericData.Record(schema);
+            record3.put("unionField", map2);
+            Map<String, Integer> map3 = new HashMap<>();
+            map3.put("key4", 121);
+            map3.put("key5", 45);
+            record3.put("mapField", map3);
+            record3.put("nestedRecord", nestedRecord);
+            record3.put("booleanField", false);
+            record3.put("intField", 53344);
+            record3.put("longField", 60L);
+            record3.put("floatField", 137.62f);
+            record3.put("doubleField", 5.77777);
+            record3.put("bytesField", ByteBuffer.wrap(new byte[] { 0x02, 0x02 
}));
+            record3.put("stringField", "Third Example");
+            dataFileWriter.append(record3);
+
+            //Fourth record to be added
+            GenericRecord record4 = new GenericData.Record(schema);
+            List<String> arrayField = new ArrayList<>();
+            arrayField.add("value1");
+            arrayField.add("value2");
+            record4.put("unionField", arrayField);
+            Map<String, Integer> map4 = new HashMap<>();
+            map4.put("key6", 112);
+            map4.put("key7", 548);
+            record4.put("mapField", map4);
+            record4.put("nestedRecord", nestedRecord);
+            record4.put("booleanField", true);
+            record4.put("intField", 544);
+            record4.put("longField", 62L);
+            record4.put("floatField", 137.62f);
+            record4.put("doubleField", 51.7787);
+            record4.put("bytesField", ByteBuffer.wrap(new byte[] { 0x02, 0x02 
}));
+            record4.put("stringField", "Fourth Example");
+            dataFileWriter.append(record4);
         } catch (IOException e) {
             System.err.println("Failed to write AVRO file: " + e.getMessage());
             e.printStackTrace();
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-union/avro-union.01.ddl.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-union/avro-union.01.ddl.sqlpp
index 65a2b38850..8c00ded11a 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-union/avro-union.01.ddl.sqlpp
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-union/avro-union.01.ddl.sqlpp
@@ -38,4 +38,13 @@ CREATE EXTERNAL DATASET AvroDataset(AvroType) USING %adapter%
   ("definition"="avro-data/reviews"),
   ("include"="*avro_type.avro"),
   ("format" = "avro")
-);
\ No newline at end of file
+);
+
+CREATE EXTERNAL DATASET AvroDataset2(AvroType) USING %adapter%
+(
+  %template%,
+  ("container"="playground"),
+  ("definition"="avro-data/reviews"),
+  ("include"="*partition_heterogeneous.avro"),
+  ("format" = "avro")
+);
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-union/avro-union.01.ddl.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-union/avro-union.03.query.sqlpp
similarity index 68%
copy from 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-union/avro-union.01.ddl.sqlpp
copy to 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-union/avro-union.03.query.sqlpp
index 65a2b38850..a7ccec064f 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-union/avro-union.01.ddl.sqlpp
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-union/avro-union.03.query.sqlpp
@@ -17,25 +17,14 @@
  * under the License.
  */
 /*
-* Description  : Field access pushdown
-* Expected Res : Success
-* Date         : Feb 23rd 2024
-*/
-
-DROP DATAVERSE test IF EXISTS;
-CREATE DATAVERSE test;
+ * Description  : Request All fields
+ * Expected Res : Success
+ * Date         : Dec 2nd 2024
+ */
 
 USE test;
 
 
-CREATE TYPE AvroType as {
-};
-
-CREATE EXTERNAL DATASET AvroDataset(AvroType) USING %adapter%
-(
-  %template%,
-  ("container"="playground"),
-  ("definition"="avro-data/reviews"),
-  ("include"="*avro_type.avro"),
-  ("format" = "avro")
-);
\ No newline at end of file
+SELECT VALUE active
+FROM AvroDataset2
+ORDER BY AvroDataset2.id;
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/heterogeneous-access/heterogeneous-access.1.ddl.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/heterogeneous-access/heterogeneous-access.1.ddl.sqlpp
index ce5eb8a257..c7dcf7a330 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/heterogeneous-access/heterogeneous-access.1.ddl.sqlpp
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/heterogeneous-access/heterogeneous-access.1.ddl.sqlpp
@@ -36,6 +36,6 @@ CREATE EXTERNAL DATASET AvroDataset(AvroType) USING %adapter%
   %template%,
   ("container"="playground"),
   ("definition"="avro-data/reviews"),
-  ("include"="*heterogeneous*"),
+  ("include"="*heterogeneous_*"),
   ("format" = "avro")
 );
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/avro-types/avro-map/avro-map.02.adm
 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/avro-types/avro-map/avro-map.02.adm
index 92d5ea1da8..e3ad7cddc8 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/avro-types/avro-map/avro-map.02.adm
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/avro-types/avro-map/avro-map.02.adm
@@ -1,2 +1,4 @@
 [ { "key": "key1", "value": 1 }, { "key": "key2", "value": 2 } ]
 [ { "key": "key3", "value": 3 }, { "key": "key4", "value": 4 } ]
+[ { "key": "key5", "value": 45 }, { "key": "key4", "value": 121 } ]
+[ { "key": "key6", "value": 112 }, { "key": "key7", "value": 548 } ]
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/avro-types/avro-map/avro-map.03.adm
 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/avro-types/avro-map/avro-map.03.adm
index 73b283a1ee..6633f00e3f 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/avro-types/avro-map/avro-map.03.adm
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/avro-types/avro-map/avro-map.03.adm
@@ -1,2 +1,4 @@
 { "key": "key1", "value": 1 }
-{ "key": "key3", "value": 3 }
\ No newline at end of file
+{ "key": "key3", "value": 3 }
+{ "key": "key5", "value": 45 }
+{ "key": "key6", "value": 112 }
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/avro-types/avro-nested-records/avro-nested-records.02.adm
 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/avro-types/avro-nested-records/avro-nested-records.02.adm
index db09d84496..6882813d8b 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/avro-types/avro-nested-records/avro-nested-records.02.adm
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/avro-types/avro-nested-records/avro-nested-records.02.adm
@@ -1,2 +1,4 @@
 { "nestedInt": 100, "nestedString": "Inside Nested" }
-{ "nestedInt": 100, "nestedString": "Inside Nested" }
\ No newline at end of file
+{ "nestedInt": 100, "nestedString": "Inside Nested" }
+{ "nestedInt": 100, "nestedString": "Inside Nested" }
+{ "nestedInt": 100, "nestedString": "Inside Nested" }
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/avro-types/avro-nested-records/avro-nested-records.03.adm
 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/avro-types/avro-nested-records/avro-nested-records.03.adm
index 4eb537c528..c5079fa2cf 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/avro-types/avro-nested-records/avro-nested-records.03.adm
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/avro-types/avro-nested-records/avro-nested-records.03.adm
@@ -1,2 +1,4 @@
 100
-100
\ No newline at end of file
+100
+100
+100
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/avro-types/avro-primitives/avro-primitives.02.adm
 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/avro-types/avro-primitives/avro-primitives.02.adm
index d25232800f..e50fb217e0 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/avro-types/avro-primitives/avro-primitives.02.adm
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/avro-types/avro-primitives/avro-primitives.02.adm
@@ -1,2 +1,4 @@
 true
-false
\ No newline at end of file
+false
+false
+true
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/avro-types/avro-primitives/avro-primitives.03.adm
 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/avro-types/avro-primitives/avro-primitives.03.adm
index 8a642e5efc..d1a93287f2 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/avro-types/avro-primitives/avro-primitives.03.adm
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/avro-types/avro-primitives/avro-primitives.03.adm
@@ -1,2 +1,4 @@
 { "intField": 32, "longField": 64, "floatField": 1.0, "doubleField": 2.0 }
-{ "intField": 54, "longField": 60, "floatField": 3.6, "doubleField": 5.77777 }
\ No newline at end of file
+{ "intField": 54, "longField": 60, "floatField": 3.5999999046325684, 
"doubleField": 5.77777 }
+{ "intField": 53344, "longField": 60, "floatField": 137.6199951171875, 
"doubleField": 5.77777 }
+{ "intField": 544, "longField": 62, "floatField": 137.6199951171875, 
"doubleField": 51.7787 }
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/avro-types/avro-union/avro-union.02.adm
 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/avro-types/avro-union/avro-union.02.adm
index 8fd021217b..464a7b9495 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/avro-types/avro-union/avro-union.02.adm
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/avro-types/avro-union/avro-union.02.adm
@@ -1,2 +1,4 @@
 42
 hex("0105")
+[ { "key": "key3", "value": 3 }, { "key": "key4", "value": 4 } ]
+[ "value1", "value2" ]
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/avro-types/avro-union/avro-union.03.adm
 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/avro-types/avro-union/avro-union.03.adm
new file mode 100644
index 0000000000..4a25d6c5f6
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/avro-types/avro-union/avro-union.03.adm
@@ -0,0 +1,100 @@
+[ true, false ]
+1
+{ "status": "active" }
+[ true, false ]
+"yes"
+true
+1
+{ "status": "active" }
+1
+true
+true
+false
+0
+[ true, false ]
+1
+"active"
+"active"
+[ true, false ]
+"active"
+"no"
+"yes"
+{ "status": "active" }
+1
+{ "status": "active" }
+0
+{ "status": "active" }
+false
+"no"
+"yes"
+"active"
+false
+[ true, false ]
+true
+true
+"active"
+0
+true
+"active"
+[ true, false ]
+"yes"
+{ "status": "active" }
+false
+"no"
+1
+true
+{ "status": "active" }
+0
+0
+"no"
+"no"
+0
+0
+true
+"no"
+false
+true
+{ "status": "active" }
+true
+0
+1
+"active"
+"no"
+{ "status": "active" }
+1
+[ true, false ]
+"no"
+false
+true
+0
+1
+1
+1
+{ "status": "active" }
+true
+true
+0
+{ "status": "active" }
+"no"
+"yes"
+"yes"
+"active"
+"no"
+false
+"no"
+0
+"yes"
+1
+[ true, false ]
+"yes"
+0
+true
+"active"
+true
+"no"
+0
+true
+"active"
+"yes"
+"active"
+"yes"
diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/AvroDataParser.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/AvroDataParser.java
index 8088daa307..c3563ccbbb 100644
--- 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/AvroDataParser.java
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/AvroDataParser.java
@@ -43,6 +43,7 @@ import 
org.apache.asterix.om.pointables.base.DefaultOpenFieldType;
 import org.apache.asterix.om.types.ATypeTag;
 import org.apache.avro.AvroRuntimeException;
 import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericArray;
 import org.apache.avro.generic.GenericData;
 import org.apache.avro.generic.GenericRecord;
 import org.apache.hyracks.api.exceptions.HyracksDataException;
@@ -180,6 +181,10 @@ public class AvroDataParser extends AbstractDataParser 
implements IRecordDataPar
                 return value instanceof ByteBuffer;
             case RECORD:
                 return value instanceof GenericData.Record;
+            case ARRAY:
+                return value instanceof GenericArray;
+            case MAP:
+                return value instanceof Map;
             default:
                 return false;
         }

Reply via email to