This is an automated email from the ASF dual-hosted git repository.
wyk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/asterixdb.git
The following commit(s) were added to refs/heads/master by this push:
new b2a2724072 [ASTERIXDB-3353][EXT] Fixing avro data parser for union
types , adding testcases.
b2a2724072 is described below
commit b2a27240722c0b11b48375b8e099f56aa8d6eb86
Author: ayush-couchbase <[email protected]>
AuthorDate: Fri Feb 23 13:04:47 2024 +0530
[ASTERIXDB-3353][EXT] Fixing avro data parser for union types , adding
testcases.
- user model changes: no
- storage format changes: no
- interface changes: yes
Details:
Fixes avro data parser for union types and adding test cases.
Change-Id: I7bdaec92c4a69e807017d355023d4d46d92a3b95
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/18178
Integration-Tests: Jenkins <[email protected]>
Tested-by: Jenkins <[email protected]>
Reviewed-by: Wail Alkowaileet <[email protected]>
---
.../asterix/app/translator/QueryTranslator.java | 1 +
.../external_dataset/ExternalDatasetTestUtils.java | 49 +++++++++
.../avro/AvroFileConverterUtil.java | 111 +++++++++++++++++++
.../avro/AvroFileExampleGeneratorUtil.java | 118 +++++++++++++++++++++
.../aws/AwsS3ExternalDatasetTest.java | 4 +
.../AzureBlobStorageExternalDatasetTest.java | 2 +
.../avro/array-access/array-access.01.ddl.sqlpp | 41 +++++++
.../avro/array-access/array-access.02.query.sqlpp | 30 ++++++
.../avro/array-access/array-access.03.query.sqlpp | 31 ++++++
.../avro/array-access/array-access.04.query.sqlpp | 31 ++++++
.../avro/array-access/array-access.05.query.sqlpp | 31 ++++++
.../avro/array-access/array-access.06.query.sqlpp | 31 ++++++
.../avro/avro-types/avro-map/avro-map.01.ddl.sqlpp | 41 +++++++
.../avro-types/avro-map/avro-map.02.query.sqlpp | 30 ++++++
.../avro-types/avro-map/avro-map.03.query.sqlpp | 29 +++++
.../avro-nested-records.01.ddl.sqlpp | 41 +++++++
.../avro-nested-records.02.query.sqlpp | 30 ++++++
.../avro-nested-records.03.query.sqlpp | 30 ++++++
.../avro-primitives/avro-primitives.01.ddl.sqlpp | 41 +++++++
.../avro-primitives/avro-primitives.02.query.sqlpp | 30 ++++++
.../avro-primitives/avro-primitives.03.query.sqlpp | 30 ++++++
.../avro-types/avro-union/avro-union.01.ddl.sqlpp | 41 +++++++
.../avro-union/avro-union.02.query.sqlpp | 30 ++++++
.../avro/field-access/field-access.01.ddl.sqlpp | 68 ++++++++++++
.../avro/field-access/field-access.02.query.sqlpp | 30 ++++++
.../avro/field-access/field-access.03.query.sqlpp | 31 ++++++
.../avro/field-access/field-access.04.query.sqlpp | 32 ++++++
.../avro/field-access/field-access.05.query.sqlpp | 30 ++++++
.../avro/field-access/field-access.06.query.sqlpp | 30 ++++++
.../heterogeneous-access.1.ddl.sqlpp | 41 +++++++
.../heterogeneous-access.2.query.sqlpp | 35 ++++++
.../avro/invalid-avro-files/test.000.ddl.sqlpp | 34 ++++++
.../avro/invalid-avro-files/test.001.query.sqlpp | 23 ++++
.../avro/invalid-avro-files/test.999.ddl.sqlpp | 20 ++++
.../avro/invalid-type/invalid-type.1.ddl.sqlpp | 42 ++++++++
.../avro/missing-fields/missing-fields.1.ddl.sqlpp | 41 +++++++
.../missing-fields/missing-fields.2.query.sqlpp | 27 +++++
.../multi-file-multi-schema.1.ddl.sqlpp | 52 +++++++++
.../multi-file-multi-schema.2.query.sqlpp | 28 +++++
.../multi-file-multi-schema.3.query.sqlpp | 30 ++++++
.../multi-file-multi-schema.4.query.sqlpp | 30 ++++++
.../common/avro/no-files/no-files.1.ddl.sqlpp | 42 ++++++++
.../common/avro/no-files/no-files.2.query.sqlpp | 30 ++++++
.../avro/object-concat/object-concat.1.ddl.sqlpp | 41 +++++++
.../avro/object-concat/object-concat.2.query.sqlpp | 29 +++++
.../select-all-fields.1.ddl.sqlpp | 41 +++++++
.../select-all-fields.2.query.sqlpp | 28 +++++
.../select-count-one-field.1.ddl.sqlpp | 41 +++++++
.../select-count-one-field.2.query.sqlpp | 27 +++++
.../string-standard-utf8.1.ddl.sqlpp | 41 +++++++
.../string-standard-utf8.2.query.sqlpp | 29 +++++
.../avro/type-mismatch/type-mismatch.1.ddl.sqlpp | 41 +++++++
.../avro/type-mismatch/type-mismatch.2.query.sqlpp | 31 ++++++
.../avro/type-mismatch/type-mismatch.3.query.sqlpp | 30 ++++++
.../common/avro/array-access/array-access.02.adm | 2 +
.../common/avro/array-access/array-access.03.adm | 1 +
.../common/avro/array-access/array-access.04.adm | 1 +
.../common/avro/array-access/array-access.05.adm | 1 +
.../common/avro/array-access/array-access.06.adm | 2 +
.../avro/avro-types/avro-map/avro-map.02.adm | 2 +
.../avro/avro-types/avro-map/avro-map.03.adm | 2 +
.../avro-nested-records/avro-nested-records.02.adm | 2 +
.../avro-nested-records/avro-nested-records.03.adm | 2 +
.../avro-primitives/avro-primitives.02.adm | 2 +
.../avro-primitives/avro-primitives.03.adm | 2 +
.../avro/avro-types/avro-union/avro-union.02.adm | 2 +
.../common/avro/field-access/field-access.02.adm | 7 ++
.../common/avro/field-access/field-access.03.adm | 7 ++
.../common/avro/field-access/field-access.04.adm | 1 +
.../common/avro/field-access/field-access.05.adm | 2 +
.../common/avro/field-access/field-access.06.adm | 1 +
.../heterogeneous-access.02.adm | 6 ++
.../avro/missing-fields/missing-fields.2.adm | 2 +
.../multi-file-multi-schema.2.adm | 14 +++
.../multi-file-multi-schema.3.adm | 14 +++
.../multi-file-multi-schema.4.adm | 14 +++
.../common/avro/no-files/no-files.02.adm | 1 +
.../common/avro/object-concat/object-concat.2.adm | 2 +
.../avro/select-all-fields/select-all-fields.2.adm | 2 +
.../select-count-one-field.2.adm | 1 +
.../string-standard-utf8.2.adm | 2 +
.../common/avro/type-mismatch/type-mismatch.02.adm | 2 +
.../common/avro/type-mismatch/type-mismatch.03.adm | 2 +
.../runtimets/testsuite_external_dataset_s3.xml | 105 ++++++++++++++++++
.../asterix/common/exceptions/ErrorCode.java | 2 +
.../src/main/resources/asx_errormsg/en.properties | 1 +
.../asterix/external/parser/AvroDataParser.java | 73 ++++++++++++-
.../asterix/external/util/ExternalDataUtils.java | 13 +++
88 files changed, 2218 insertions(+), 5 deletions(-)
diff --git
a/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/translator/QueryTranslator.java
b/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/translator/QueryTranslator.java
index f20a03194d..6424280bbd 100644
---
a/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/translator/QueryTranslator.java
+++
b/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/translator/QueryTranslator.java
@@ -1211,6 +1211,7 @@ public class QueryTranslator extends
AbstractLangTranslator implements IStatemen
ExternalDetailsDecl externalDetails = (ExternalDetailsDecl)
dd.getDatasetDetailsDecl();
Map<String, String> properties = externalDetails.getProperties();
ExternalDataUtils.validateParquetTypeAndConfiguration(properties,
(ARecordType) itemType.getDatatype());
+ ExternalDataUtils.validateAvroTypeAndConfiguration(properties,
(ARecordType) itemType.getDatatype());
return properties;
}
diff --git
a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/ExternalDatasetTestUtils.java
b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/ExternalDatasetTestUtils.java
index 90f46ad51c..55a515c879 100644
---
a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/ExternalDatasetTestUtils.java
+++
b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/ExternalDatasetTestUtils.java
@@ -18,6 +18,7 @@
*/
package org.apache.asterix.test.external_dataset;
+import static
org.apache.asterix.test.external_dataset.avro.AvroFileConverterUtil.AVRO_GEN_BASEDIR;
import static
org.apache.asterix.test.external_dataset.aws.AwsS3ExternalDatasetTest.BOM_FILE_CONTAINER;
import static
org.apache.asterix.test.external_dataset.aws.AwsS3ExternalDatasetTest.DYNAMIC_PREFIX_AT_START_CONTAINER;
import static
org.apache.asterix.test.external_dataset.aws.AwsS3ExternalDatasetTest.FIXED_DATA_CONTAINER;
@@ -33,6 +34,7 @@ import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Collection;
+import org.apache.asterix.test.external_dataset.avro.AvroFileConverterUtil;
import
org.apache.asterix.test.external_dataset.parquet.BinaryFileConverterUtil;
import org.apache.asterix.testframework.context.TestCaseContext;
import org.apache.commons.io.FilenameUtils;
@@ -51,6 +53,7 @@ public class ExternalDatasetTestUtils {
private static final FilenameFilter JSON_FILTER = ((dir, name) ->
name.endsWith(".json"));
private static final FilenameFilter CSV_FILTER = ((dir, name) ->
name.endsWith(".csv"));
private static final FilenameFilter PARQUET_FILTER = ((dir, name) ->
name.endsWith(".parquet"));
+ private static final FilenameFilter AVRO_FILTER = ((dir, name) ->
name.endsWith(".avro"));
// Base directory paths for data files
private static String JSON_DATA_PATH;
@@ -64,6 +67,7 @@ public class ExternalDatasetTestUtils {
public static final String TSV_DEFINITION = "tsv-data/reviews/";
public static final String MIXED_DEFINITION = "mixed-data/reviews/";
public static final String PARQUET_DEFINITION = "parquet-data/reviews/";
+ public static final String AVRO_DEFINITION = "avro-data/reviews/";
// This is used for a test to generate over 1000 number of files
public static final String OVER_1000_OBJECTS_PATH = "over-1000-objects";
@@ -101,6 +105,13 @@ public class ExternalDatasetTestUtils {
BinaryFileConverterUtil.convertToParquet(basePath, parquetRawJsonDir,
BINARY_GEN_BASEDIR);
}
+ public static void createAvroFiles(String avroRawJsonDir) throws
IOException {
+ File basePath = new File(".");
+ // cleaning directory
+ BinaryFileConverterUtil.cleanBinaryDirectory(basePath,
AVRO_GEN_BASEDIR);
+ AvroFileConverterUtil.convertToAvro(basePath, avroRawJsonDir,
AVRO_GEN_BASEDIR);
+ }
+
/**
* Generate binary files (e.g., parquet files)
*/
@@ -114,6 +125,14 @@ public class ExternalDatasetTestUtils {
JSON_FILTER, startIndex);
}
+ public static void createAvroFilesRecursively(String dataToConvertDirPath)
throws IOException {
+ //base path
+ File basePath = new File(".");
+ int startIndex = dataToConvertDirPath.indexOf("/external-filter");
+ AvroFileConverterUtil.convertToAvroRecursively(basePath,
dataToConvertDirPath, AVRO_GEN_BASEDIR, JSON_FILTER,
+ startIndex);
+ }
+
public static void setDataPaths(String jsonDataPath, String csvDataPath,
String tsvDataPath) {
JSON_DATA_PATH = jsonDataPath;
CSV_DATA_PATH = csvDataPath;
@@ -157,6 +176,10 @@ public class ExternalDatasetTestUtils {
loadParquetFiles();
LOGGER.info("Parquet files added successfully");
+ LOGGER.info("Adding Avro files to the bucket");
+ loadAvroFiles();
+ LOGGER.info("Avro files added successfully");
+
LOGGER.info("Files added successfully");
}
@@ -323,11 +346,37 @@ public class ExternalDatasetTestUtils {
IoUtil.getMatchingFiles(Paths.get(generatedDataBasePath +
"/external-filter"), PARQUET_FILTER);
for (File file : files) {
String fileName = file.getName();
+ String fileParent = file.getParent();
String externalFilterDefinition =
file.getParent().substring(generatedDataBasePath.length() + 1) + "/";
loadData(file.getParent(), "", fileName, "parquet-data/" +
externalFilterDefinition, "", false, false);
}
}
+ private static void loadAvroFiles() {
+ String generatedDataBasePath = AVRO_GEN_BASEDIR;
+ String definition = AVRO_DEFINITION;
+ String definitionSegment = "";
+
+ loadData(generatedDataBasePath, "", "dummy_tweet.avro", definition,
definitionSegment, false, false);
+ loadData(generatedDataBasePath, "", "id_age.avro", definition,
definitionSegment, false, false);
+ loadData(generatedDataBasePath, "", "id_age-string.avro", definition,
definitionSegment, false, false);
+ loadData(generatedDataBasePath, "", "id_name.avro", definition,
definitionSegment, false, false);
+ loadData(generatedDataBasePath, "", "id_name_comment.avro",
definition, definitionSegment, false, false);
+ loadData(generatedDataBasePath, "", "heterogeneous_1.avro",
definition, definitionSegment, false, false);
+ loadData(generatedDataBasePath, "", "heterogeneous_2.avro",
definition, definitionSegment, false, false);
+ loadData(generatedDataBasePath, "", "avro_type.avro", definition,
definitionSegment, false, false);
+
+ Collection<File> files =
+ IoUtil.getMatchingFiles(Paths.get(generatedDataBasePath +
"/external-filter"), AVRO_FILTER);
+ for (File file : files) {
+ String fileName = file.getName();
+ String fileParent = file.getParent();
+ String externalFilterDefinition =
file.getParent().substring(generatedDataBasePath.length() + 1) + "/";
+ loadData(file.getParent(), "", fileName, "avro-data/" +
externalFilterDefinition, "", false, false);
+ }
+ return;
+ }
+
private static void loadDirectory(String dataBasePath, String rootPath,
FilenameFilter filter) {
File dir = new File(dataBasePath, rootPath);
if (!dir.exists() || !dir.isDirectory()) {
diff --git
a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/avro/AvroFileConverterUtil.java
b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/avro/AvroFileConverterUtil.java
new file mode 100644
index 0000000000..db12e7b1fd
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/avro/AvroFileConverterUtil.java
@@ -0,0 +1,111 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.test.external_dataset.avro;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileReader;
+import java.io.FilenameFilter;
+import java.io.IOException;
+import java.nio.file.Paths;
+import java.util.Collection;
+
+import org.apache.asterix.test.external_dataset.parquet.JsonUtil;
+import org.apache.avro.Schema;
+import org.apache.avro.file.DataFileWriter;
+import org.apache.avro.generic.GenericDatumWriter;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.hadoop.fs.Path;
+import org.apache.hyracks.api.util.IoUtil;
+
+import tech.allegro.schema.json2avro.converter.JsonAvroConverter;
+
+public class AvroFileConverterUtil {
+
+ public static final String DEFAULT_PARQUET_SRC_PATH = "data/hdfs/parquet";
+ public static final String AVRO_GEN_BASEDIR = "target" +
File.separatorChar + "generated_avro_files";
+
+ //How many records should the schema inference method inspect to infer the
schema for parquet files
+ private static final int NUM_OF_RECORDS_SCHEMA = 20;
+
+ private AvroFileConverterUtil() {
+ }
+
+ private static void convertToJsonAndWriteAvro(File jsonFile, Schema
schema, Path avroFilePath) throws IOException {
+ File outputFile = new File(avroFilePath.toString());
+ File parentDir = outputFile.getParentFile();
+ if (!parentDir.exists() && !parentDir.mkdirs()) {
+ throw new IOException("Failed to create directory " + parentDir);
+ }
+ GenericDatumWriter<GenericRecord> datumWriter = new
GenericDatumWriter<>(schema);
+ try (DataFileWriter<GenericRecord> dataFileWriter = new
DataFileWriter<>(datumWriter)) {
+ dataFileWriter.create(schema, new File(avroFilePath.toString()));
+ JsonAvroConverter converter = new JsonAvroConverter();
+ try (BufferedReader reader = new BufferedReader(new
FileReader(jsonFile))) {
+ String line;
+ while ((line = reader.readLine()) != null) {
+ GenericRecord record =
converter.convertToGenericDataRecord(line.getBytes(), schema);
+ dataFileWriter.append(record);
+ }
+ } catch (DataFileWriter.AppendWriteException e) {
+ System.err.println("Failed to append record to Avro file: " +
e.getMessage());
+ }
+ }
+ }
+
+ public static void writeAvroFile(File jsonFile, Path avroPath) throws
IOException {
+ FileInputStream schemaInputStream = new FileInputStream(jsonFile);
+ Schema schema = JsonUtil.inferSchema(schemaInputStream, "avro_schema",
NUM_OF_RECORDS_SCHEMA);
+ convertToJsonAndWriteAvro(jsonFile, schema, avroPath);
+ }
+
+ public static void convertToAvroRecursively(File localDataRoot, String
src, String dest, FilenameFilter filter,
+ int startIndex) throws IOException {
+ File destPath = new File(localDataRoot, dest);
+
+ File dir = new File(src);
+ if (!dir.exists() || !dir.isDirectory()) {
+ return;
+ }
+
+ Collection<File> files = IoUtil.getMatchingFiles(dir.toPath(), filter);
+ for (File file : files) {
+ String fileName = file.getName().substring(0,
file.getName().indexOf(".")) + ".avro";
+ Path outputPath = new Path(
+ Paths.get(destPath.getAbsolutePath(),
file.getParent().substring(startIndex), fileName).toString());
+
+ writeAvroFile(file, outputPath);
+ }
+ }
+
+ public static void convertToAvro(File localDataRoot, String src, String
dest) throws IOException {
+ File srcPath = new File(localDataRoot, src);
+ File destPath = new File(localDataRoot, dest);
+
+ //write avro files
+ File[] listOfFiles = srcPath.listFiles();
+ for (File jsonFile : listOfFiles) {
+ String fileName = jsonFile.getName().substring(0,
jsonFile.getName().indexOf(".")) + ".avro";
+ Path outputPath = new Path(destPath.getAbsolutePath(), fileName);
+ writeAvroFile(jsonFile, outputPath);
+ }
+ AvroFileExampleGeneratorUtil.writeExample();
+ }
+}
diff --git
a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/avro/AvroFileExampleGeneratorUtil.java
b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/avro/AvroFileExampleGeneratorUtil.java
new file mode 100644
index 0000000000..d62d2d107e
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/avro/AvroFileExampleGeneratorUtil.java
@@ -0,0 +1,118 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.asterix.test.external_dataset.avro;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.avro.Schema;
+import org.apache.avro.file.DataFileWriter;
+import org.apache.avro.generic.GenericData;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.avro.io.DatumWriter;
+import org.apache.avro.specific.SpecificDatumWriter;
+
+public class AvroFileExampleGeneratorUtil {
+ private static final String SCHEMA_STRING = "{\n" + " \"type\":
\"record\",\n" + " \"name\": \"SimpleRecord\",\n"
+ + " \"namespace\": \"com.example\",\n" + " \"fields\": [\n" + "
{\n"
+ + " \"name\": \"unionField\",\n" + " \"type\": [\"int\",
\"string\"],\n"
+ + " \"doc\": \"This field can be either an int or a
string.\"\n" + " },\n" + " {\n"
+ + " \"name\": \"mapField\",\n" + " \"type\": {\n" + "
\"type\": \"map\",\n"
+ + " \"values\": \"int\",\n" + " \"doc\": \"This is a
map of string keys to int values.\"\n"
+ + " },\n" + " \"doc\": \"This field represents a map
with string keys and integer values.\"\n"
+ + " },\n" + " {\n" + " \"name\": \"nestedRecord\",\n" +
" \"type\": {\n"
+ + " \"type\": \"record\",\n" + " \"name\":
\"NestedRecord\",\n" + " \"fields\": [\n"
+ + " {\n" + " \"name\": \"nestedInt\",\n" + "
\"type\": \"int\"\n"
+ + " },\n" + " {\n" + " \"name\":
\"nestedString\",\n"
+ + " \"type\": \"string\"\n" + " }\n" + "
]\n" + " },\n"
+ + " \"doc\": \"This is a nested record.\"\n" + " },\n" + "
{\n"
+ + " \"name\": \"booleanField\",\n" + " \"type\":
\"boolean\",\n"
+ + " \"doc\": \"This is a boolean field.\"\n" + " },\n" + "
{\n"
+ + " \"name\": \"intField\",\n" + " \"type\": \"int\",\n"
+ + " \"doc\": \"This is an int field.\"\n" + " },\n" + "
{\n" + " \"name\": \"longField\",\n"
+ + " \"type\": \"long\",\n" + " \"doc\": \"This is a long
field.\"\n" + " },\n" + " {\n"
+ + " \"name\": \"floatField\",\n" + " \"type\":
\"float\",\n"
+ + " \"doc\": \"This is a float field.\"\n" + " },\n" + "
{\n"
+ + " \"name\": \"doubleField\",\n" + " \"type\":
\"double\",\n"
+ + " \"doc\": \"This is a double field.\"\n" + " },\n" + "
{\n"
+ + " \"name\": \"bytesField\",\n" + " \"type\":
\"bytes\",\n"
+ + " \"doc\": \"This is a bytes field.\"\n" + " },\n" + "
{\n"
+ + " \"name\": \"stringField\",\n" + " \"type\":
\"string\",\n"
+ + " \"doc\": \"This is a string field.\"\n" + " }\n" + "
]\n" + "}\n";
+
+ private static final String AVRO_GEN_BASEDIR =
"target/generated_avro_files";
+ private static final String FILE_NAME = "avro_type.avro";
+
+ public static void writeExample() throws IOException {
+ Schema schema = new Schema.Parser().parse(SCHEMA_STRING);
+ File destPath = new File(AVRO_GEN_BASEDIR);
+ File outputFile = new File(destPath, FILE_NAME);
+
+ DatumWriter<GenericRecord> datumWriter = new
SpecificDatumWriter<>(schema);
+ try (DataFileWriter<GenericRecord> dataFileWriter = new
DataFileWriter<>(datumWriter)) {
+ dataFileWriter.create(schema, outputFile);
+
+ // First record with unionField as int
+ GenericRecord nestedRecord = new
GenericData.Record(schema.getField("nestedRecord").schema());
+ nestedRecord.put("nestedInt", 100);
+ nestedRecord.put("nestedString", "Inside Nested");
+
+ // First record with various fields
+ GenericRecord record = new GenericData.Record(schema);
+ record.put("unionField", 42);
+ Map<String, Integer> map = new HashMap<>();
+ map.put("key1", 1);
+ map.put("key2", 2);
+ record.put("mapField", map);
+ record.put("nestedRecord", nestedRecord);
+ record.put("booleanField", true);
+ record.put("intField", 32);
+ record.put("longField", 64L);
+ record.put("floatField", 1.0f);
+ record.put("doubleField", 2.0);
+ record.put("bytesField", ByteBuffer.wrap(new byte[] { 0x01, 0x02
}));
+ record.put("stringField", "Example string");
+ dataFileWriter.append(record);
+
+ //second record to be added
+ GenericRecord record2 = new GenericData.Record(schema);
+ record2.put("unionField", "Example string");
+ Map<String, Integer> map2 = new HashMap<>();
+ map2.put("key3", 3);
+ map2.put("key4", 4);
+ record2.put("mapField", map2);
+ record2.put("nestedRecord", nestedRecord);
+ record2.put("booleanField", false);
+ record2.put("intField", 54);
+ record2.put("longField", 60L);
+ record2.put("floatField", 3.6f);
+ record2.put("doubleField", 5.77777);
+ record2.put("bytesField", ByteBuffer.wrap(new byte[] { 0x06, 0x04
}));
+ record2.put("stringField", "Sample Values");
+ dataFileWriter.append(record2);
+ } catch (IOException e) {
+ System.err.println("Failed to write AVRO file: " + e.getMessage());
+ e.printStackTrace();
+ }
+ }
+}
diff --git
a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetTest.java
b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetTest.java
index 7892a177de..7912d57960 100644
---
a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetTest.java
+++
b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetTest.java
@@ -18,6 +18,8 @@
*/
package org.apache.asterix.test.external_dataset.aws;
+import static
org.apache.asterix.test.external_dataset.ExternalDatasetTestUtils.createAvroFiles;
+import static
org.apache.asterix.test.external_dataset.ExternalDatasetTestUtils.createAvroFilesRecursively;
import static
org.apache.asterix.test.external_dataset.ExternalDatasetTestUtils.createBinaryFiles;
import static
org.apache.asterix.test.external_dataset.ExternalDatasetTestUtils.createBinaryFilesRecursively;
import static
org.apache.asterix.test.external_dataset.ExternalDatasetTestUtils.setDataPaths;
@@ -194,6 +196,8 @@ public class AwsS3ExternalDatasetTest {
LangExecutionUtil.setUp(TEST_CONFIG_FILE_NAME, testExecutor);
createBinaryFiles(DEFAULT_PARQUET_SRC_PATH);
createBinaryFilesRecursively(EXTERNAL_FILTER_DATA_PATH);
+ createAvroFiles(DEFAULT_PARQUET_SRC_PATH);
+ createAvroFilesRecursively(EXTERNAL_FILTER_DATA_PATH);
setNcEndpoints(testExecutor);
startAwsS3MockServer();
}
diff --git
a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/microsoft/AzureBlobStorageExternalDatasetTest.java
b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/microsoft/AzureBlobStorageExternalDatasetTest.java
index 9858e562d3..d35440e4ff 100644
---
a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/microsoft/AzureBlobStorageExternalDatasetTest.java
+++
b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/microsoft/AzureBlobStorageExternalDatasetTest.java
@@ -127,6 +127,8 @@ public class AzureBlobStorageExternalDatasetTest {
final TestExecutor testExecutor = new AzureTestExecutor();
ExternalDatasetTestUtils.createBinaryFiles(PARQUET_RAW_DATA_PATH);
createBinaryFilesRecursively(EXTERNAL_FILTER_DATA_PATH);
+ ExternalDatasetTestUtils.createAvroFiles(PARQUET_RAW_DATA_PATH);
+ createAvroFilesRecursively(EXTERNAL_FILTER_DATA_PATH);
LangExecutionUtil.setUp(TEST_CONFIG_FILE_NAME, testExecutor);
setNcEndpoints(testExecutor);
createBlobServiceClient();
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/array-access/array-access.01.ddl.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/array-access/array-access.01.ddl.sqlpp
new file mode 100644
index 0000000000..606c7818f4
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/array-access/array-access.01.ddl.sqlpp
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+* Description : Array access pushdown
+* Expected Res : Success
+* Date : Feb 23rd 2024
+*/
+
+DROP DATAVERSE test IF EXISTS;
+CREATE DATAVERSE test;
+
+USE test;
+
+
+CREATE TYPE AvroType as {
+};
+
+CREATE EXTERNAL DATASET AvroDataset(AvroType) USING %adapter%
+(
+ %template%,
+ ("container"="playground"),
+ ("definition"="avro-data/reviews"),
+ ("include"="*dummy_tweet.avro"),
+ ("format" = "avro")
+);
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/array-access/array-access.02.query.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/array-access/array-access.02.query.sqlpp
new file mode 100644
index 0000000000..f07bc3555b
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/array-access/array-access.02.query.sqlpp
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Description : Push down get-item
+ * Expected Res : Success
+ * Date : Feb 23rd 2024
+ */
+
+USE test;
+
+
+SELECT a.entities.urls[0].display_url
+FROM AvroDataset a
+ORDER BY a.id;
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/array-access/array-access.03.query.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/array-access/array-access.03.query.sqlpp
new file mode 100644
index 0000000000..e1abe7d450
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/array-access/array-access.03.query.sqlpp
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Description : Push down a.entities.urls
+ * Expected Res : Success
+ * Date : Feb 23rd 2024
+ */
+
+USE test;
+
+
+SELECT a.entities.urls[*].display_url
+FROM AvroDataset a
+WHERE a.entities.urls IS NOT MISSING
+ORDER BY a.id;
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/array-access/array-access.04.query.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/array-access/array-access.04.query.sqlpp
new file mode 100644
index 0000000000..4ebcca8cd9
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/array-access/array-access.04.query.sqlpp
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Description : Push down scan-collection from unnest
+ * Expected Res : Success
+ * Date : Feb 23rd 2024
+ */
+
+USE test;
+
+
+
+SELECT urls.display_url
+FROM AvroDataset a, a.entities.urls urls
+ORDER BY a.id;
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/array-access/array-access.05.query.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/array-access/array-access.05.query.sqlpp
new file mode 100644
index 0000000000..951039c388
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/array-access/array-access.05.query.sqlpp
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Description : Push down scan-collection from unnest in a subplan
+ * Expected Res : Success
+ * Date : Feb 23rd 2024
+ */
+
+USE test;
+
+
+
+SELECT VALUE COUNT(*)
+FROM AvroDataset a
+WHERE (EVERY ht in a.entities.urls SATISFIES ht.display_url = "string");
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/array-access/array-access.06.query.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/array-access/array-access.06.query.sqlpp
new file mode 100644
index 0000000000..87827c3840
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/array-access/array-access.06.query.sqlpp
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Description : Push down nested get_item
+ * Expected Res : Success
+ * Date : Feb 23rd 2024
+ */
+
+USE test;
+
+
+
+SELECT VALUE a.place.bounding_box.coordinates[0][0][0]
+FROM AvroDataset a
+ORDER BY a.id;
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-map/avro-map.01.ddl.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-map/avro-map.01.ddl.sqlpp
new file mode 100644
index 0000000000..65a2b38850
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-map/avro-map.01.ddl.sqlpp
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+* Description : Field access pushdown
+* Expected Res : Success
+* Date : Feb 23rd 2024
+*/
+
+DROP DATAVERSE test IF EXISTS;
+CREATE DATAVERSE test;
+
+USE test;
+
+
+CREATE TYPE AvroType as {
+};
+
+CREATE EXTERNAL DATASET AvroDataset(AvroType) USING %adapter%
+(
+ %template%,
+ ("container"="playground"),
+ ("definition"="avro-data/reviews"),
+ ("include"="*avro_type.avro"),
+ ("format" = "avro")
+);
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-map/avro-map.02.query.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-map/avro-map.02.query.sqlpp
new file mode 100644
index 0000000000..61ed57e72a
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-map/avro-map.02.query.sqlpp
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Description : Ignore Field Access pushdown when requesting all fields
+ * Expected Res : Success
+ * Date : Feb 23rd 2024
+ */
+
+USE test;
+
+
+SELECT VALUE a.mapField
+FROM AvroDataset a
+ORDER BY a.id;
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-map/avro-map.03.query.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-map/avro-map.03.query.sqlpp
new file mode 100644
index 0000000000..b97d9f85a7
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-map/avro-map.03.query.sqlpp
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Description : Ignore Field Access pushdown when requesting all fields
+ * Expected Res : Success
+ * Date : Feb 23rd 2024
+ */
+
+USE test;
+
+SELECT RAW a.mapField.key1
+FROM AvroDataset a
+ORDER BY a.id;
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-nested-records/avro-nested-records.01.ddl.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-nested-records/avro-nested-records.01.ddl.sqlpp
new file mode 100644
index 0000000000..65a2b38850
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-nested-records/avro-nested-records.01.ddl.sqlpp
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+* Description : Field access pushdown
+* Expected Res : Success
+* Date : Feb 23rd 2024
+*/
+
+DROP DATAVERSE test IF EXISTS;
+CREATE DATAVERSE test;
+
+USE test;
+
+
+CREATE TYPE AvroType as {
+};
+
+CREATE EXTERNAL DATASET AvroDataset(AvroType) USING %adapter%
+(
+ %template%,
+ ("container"="playground"),
+ ("definition"="avro-data/reviews"),
+ ("include"="*avro_type.avro"),
+ ("format" = "avro")
+);
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-nested-records/avro-nested-records.02.query.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-nested-records/avro-nested-records.02.query.sqlpp
new file mode 100644
index 0000000000..deef7c1a27
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-nested-records/avro-nested-records.02.query.sqlpp
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Description : Ignore Field Access pushdown when requesting all fields
+ * Expected Res : Success
+ * Date : Feb 23rd 2024
+ */
+
+USE test;
+
+
+SELECT VALUE a.nestedRecord
+FROM AvroDataset a
+ORDER BY a.id;
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-nested-records/avro-nested-records.03.query.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-nested-records/avro-nested-records.03.query.sqlpp
new file mode 100644
index 0000000000..7e58df91ac
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-nested-records/avro-nested-records.03.query.sqlpp
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Description : Ignore Field Access pushdown when requesting all fields
+ * Expected Res : Success
+ * Date : Feb 23rd 2024
+ */
+
+USE test;
+
+
+SELECT VALUE a.nestedRecord.nestedInt
+FROM AvroDataset a
+ORDER BY a.id;
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-primitives/avro-primitives.01.ddl.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-primitives/avro-primitives.01.ddl.sqlpp
new file mode 100644
index 0000000000..65a2b38850
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-primitives/avro-primitives.01.ddl.sqlpp
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+* Description : Field access pushdown
+* Expected Res : Success
+* Date : Feb 23rd 2024
+*/
+
+DROP DATAVERSE test IF EXISTS;
+CREATE DATAVERSE test;
+
+USE test;
+
+
+CREATE TYPE AvroType as {
+};
+
+CREATE EXTERNAL DATASET AvroDataset(AvroType) USING %adapter%
+(
+ %template%,
+ ("container"="playground"),
+ ("definition"="avro-data/reviews"),
+ ("include"="*avro_type.avro"),
+ ("format" = "avro")
+);
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-primitives/avro-primitives.02.query.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-primitives/avro-primitives.02.query.sqlpp
new file mode 100644
index 0000000000..136107f146
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-primitives/avro-primitives.02.query.sqlpp
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Description : Ignore Field Access pushdown when requesting all fields
+ * Expected Res : Success
+ * Date : Feb 23rd 2024
+ */
+
+USE test;
+
+
+SELECT VALUE a.booleanField
+FROM AvroDataset a
+ORDER BY a.id;
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-primitives/avro-primitives.03.query.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-primitives/avro-primitives.03.query.sqlpp
new file mode 100644
index 0000000000..d238d38314
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-primitives/avro-primitives.03.query.sqlpp
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Description : Ignore Field Access pushdown when requesting all fields
+ * Expected Res : Success
+ * Date : Feb 23rd 2024
+ */
+
+USE test;
+
+
+SELECT a.intField,a.longField,a.floatField,a.doubleField
+FROM AvroDataset a
+ORDER BY a.id;
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-union/avro-union.01.ddl.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-union/avro-union.01.ddl.sqlpp
new file mode 100644
index 0000000000..65a2b38850
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-union/avro-union.01.ddl.sqlpp
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+* Description : Field access pushdown
+* Expected Res : Success
+* Date : Feb 23rd 2024
+*/
+
+DROP DATAVERSE test IF EXISTS;
+CREATE DATAVERSE test;
+
+USE test;
+
+
+CREATE TYPE AvroType as {
+};
+
+CREATE EXTERNAL DATASET AvroDataset(AvroType) USING %adapter%
+(
+ %template%,
+ ("container"="playground"),
+ ("definition"="avro-data/reviews"),
+ ("include"="*avro_type.avro"),
+ ("format" = "avro")
+);
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-union/avro-union.02.query.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-union/avro-union.02.query.sqlpp
new file mode 100644
index 0000000000..48d4587cc6
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-union/avro-union.02.query.sqlpp
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Description : Ignore Field Access pushdown when requesting all fields
+ * Expected Res : Success
+ * Date : Feb 23rd 2024
+ */
+
+USE test;
+
+
+SELECT VALUE a.unionField
+FROM AvroDataset a
+ORDER BY a.id;
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/field-access/field-access.01.ddl.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/field-access/field-access.01.ddl.sqlpp
new file mode 100644
index 0000000000..5e30b26b45
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/field-access/field-access.01.ddl.sqlpp
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+* Description : Field access pushdown
+* Expected Res : Success
+* Date : Feb 23rd 2024
+*/
+
+DROP DATAVERSE test IF EXISTS;
+CREATE DATAVERSE test;
+
+USE test;
+
+
+CREATE TYPE AvroType as {
+};
+
+CREATE EXTERNAL DATASET AvroDataset(AvroType) USING %adapter%
+(
+ %template%,
+ ("container"="playground"),
+ ("definition"="avro-data/reviews"),
+ ("include"="*id_age.avro"),
+ ("format" = "avro")
+);
+
+CREATE EXTERNAL DATASET AvroDataset2(AvroType) USING %adapter%
+(
+ %template%,
+ ("container"="playground"),
+ ("definition"="avro-data/reviews"),
+ ("include"="*id_age.avro"),
+ ("format" = "avro")
+);
+
+CREATE EXTERNAL DATASET AvroDataset3(AvroType) USING %adapter%
+(
+ %template%,
+ ("container"="playground"),
+ ("definition"="avro-data/reviews"),
+ ("include"="*id_name_comment.avro"),
+ ("format" = "avro")
+);
+
+CREATE EXTERNAL DATASET AvroDataset4(AvroType) USING %adapter%
+(
+ %template%,
+ ("container"="playground"),
+ ("definition"="avro-data/reviews"),
+ ("include"="*dummy_tweet.avro"),
+ ("format" = "avro")
+);
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/field-access/field-access.02.query.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/field-access/field-access.02.query.sqlpp
new file mode 100644
index 0000000000..9225aba865
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/field-access/field-access.02.query.sqlpp
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Description : Ignore Field Access pushdown when requesting all fields
+ * Expected Res : Success
+ * Date : Feb 23rd 2024
+ */
+
+USE test;
+
+
+SELECT VALUE a
+FROM AvroDataset a
+ORDER BY a.id;
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/field-access/field-access.03.query.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/field-access/field-access.03.query.sqlpp
new file mode 100644
index 0000000000..3f70bc0ce8
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/field-access/field-access.03.query.sqlpp
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Description : Ignore Field Access pushdown when requesting all fields
+ * Expected Res : Success
+ * Date : Feb 23rd 2024
+ */
+
+USE test;
+
+
+SELECT a1, a2.id
+FROM AvroDataset a1, AvroDataset2 a2
+WHERE a1.id = a2.id
+ORDER BY a2.id;
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/field-access/field-access.04.query.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/field-access/field-access.04.query.sqlpp
new file mode 100644
index 0000000000..dca1a16c94
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/field-access/field-access.04.query.sqlpp
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Description : Access different fields when joining two datasets
+ * Expected Res : Success
+ * Date : Feb 23rd 2024
+ */
+
+USE test;
+
+
+
+SELECT a1.age, a2.name
+FROM AvroDataset a1, AvroDataset3 a2
+WHERE a1.id = a2.id
+ORDER BY a2.id;
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/field-access/field-access.05.query.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/field-access/field-access.05.query.sqlpp
new file mode 100644
index 0000000000..33232b1873
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/field-access/field-access.05.query.sqlpp
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Description : Push down children of common fields access when requesting
nested values
+ * Expected Res : Success
+ * Date : Feb 23rd 2024
+ */
+
+USE test;
+
+
+SELECT a.user.id, a.user.name
+FROM AvroDataset4 a
+ORDER BY a.id;
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/field-access/field-access.06.query.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/field-access/field-access.06.query.sqlpp
new file mode 100644
index 0000000000..7b96d53cc2
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/field-access/field-access.06.query.sqlpp
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Description : Test SELECT COUNT(*)
+ * Expected Res : Success
+ * Date : Feb 23rd 2024
+ */
+
+USE test;
+
+
+
+SELECT VALUE COUNT(*)
+FROM AvroDataset4 a;
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/heterogeneous-access/heterogeneous-access.1.ddl.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/heterogeneous-access/heterogeneous-access.1.ddl.sqlpp
new file mode 100644
index 0000000000..ce5eb8a257
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/heterogeneous-access/heterogeneous-access.1.ddl.sqlpp
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+* Description : Heterogeneous access DDL
+* Expected Res : Success
+* Date : Feb 23rd 2024
+*/
+
+DROP DATAVERSE test IF EXISTS;
+CREATE DATAVERSE test;
+
+USE test;
+
+
+CREATE TYPE AvroType as {
+};
+
+CREATE EXTERNAL DATASET AvroDataset(AvroType) USING %adapter%
+(
+ %template%,
+ ("container"="playground"),
+ ("definition"="avro-data/reviews"),
+ ("include"="*heterogeneous*"),
+ ("format" = "avro")
+);
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/heterogeneous-access/heterogeneous-access.2.query.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/heterogeneous-access/heterogeneous-access.2.query.sqlpp
new file mode 100644
index 0000000000..bd5b240984
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/heterogeneous-access/heterogeneous-access.2.query.sqlpp
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+* Description : Accessing a heterogeneous value
+* Expected Res : Success
+* Date : Feb 23rd 2024
+*/
+USE test;
+
+
+SELECT VALUE (
+ CASE WHEN is_array(a.arrayOrObject) THEN
+ a.arrayOrObject[*].text
+ ELSE
+ a.arrayOrObject.text
+ END
+)
+FROM AvroDataset a
+ORDER BY a.id;
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/invalid-avro-files/test.000.ddl.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/invalid-avro-files/test.000.ddl.sqlpp
new file mode 100644
index 0000000000..a3a6d0dffa
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/invalid-avro-files/test.000.ddl.sqlpp
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+DROP DATAVERSE test IF EXISTS;
+CREATE DATAVERSE test;
+
+USE test;
+
+CREATE TYPE AvroType as {
+};
+
+CREATE EXTERNAL DATASET AvroDataset(AvroType) USING %adapter%
+(
+ %template%,
+ ("container"="playground"),
+ ("definition"="json-data/reviews/single-line/json"),
+ ("format" = "avro")
+);
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/invalid-avro-files/test.001.query.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/invalid-avro-files/test.001.query.sqlpp
new file mode 100644
index 0000000000..1334f7f265
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/invalid-avro-files/test.001.query.sqlpp
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contiributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE test;
+
+SELECT VALUE COUNT(*)
+FROM AvroDataset a
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/invalid-avro-files/test.999.ddl.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/invalid-avro-files/test.999.ddl.sqlpp
new file mode 100644
index 0000000000..20dc6fde5e
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/invalid-avro-files/test.999.ddl.sqlpp
@@ -0,0 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+DROP DATAVERSE test IF EXISTS;
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/invalid-type/invalid-type.1.ddl.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/invalid-type/invalid-type.1.ddl.sqlpp
new file mode 100644
index 0000000000..c6ee97004d
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/invalid-type/invalid-type.1.ddl.sqlpp
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+* Description : Test type validation for Avro
+* Expected Res : ASX3123: Type 'AvroType' contains declared fields, which is
not supported for 'avro' format
+* Date : Feb 23rd 2024
+*/
+
+DROP DATAVERSE test IF EXISTS;
+CREATE DATAVERSE test;
+
+USE test;
+
+CREATE TYPE AvroType as {
+ id: string,
+ text: string
+};
+
+CREATE EXTERNAL DATASET AvroDataset(AvroType) USING %adapter%
+(
+ %template%,
+ ("container"="playground"),
+ ("definition"="avro-data/reviews"),
+ ("include"="*id_age.avro"),
+ ("format" = "avro")
+);
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/missing-fields/missing-fields.1.ddl.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/missing-fields/missing-fields.1.ddl.sqlpp
new file mode 100644
index 0000000000..3c8e934a5a
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/missing-fields/missing-fields.1.ddl.sqlpp
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+* Description : Requesting non-existing fields should not fail
+* Expected Res : Success
+* Date : Feb 23rd 2024
+*/
+
+DROP DATAVERSE test IF EXISTS;
+CREATE DATAVERSE test;
+
+USE test;
+
+
+CREATE TYPE AvroType as {
+};
+
+CREATE EXTERNAL DATASET AvroDataset(AvroType) USING %adapter%
+(
+ %template%,
+ ("container"="playground"),
+ ("definition"="avro-data/reviews"),
+ ("include"="*dummy_tweet.avro"),
+ ("format" = "avro")
+);
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/missing-fields/missing-fields.2.query.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/missing-fields/missing-fields.2.query.sqlpp
new file mode 100644
index 0000000000..09fbb2dbdb
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/missing-fields/missing-fields.2.query.sqlpp
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+* Description : Requesting non-existing fields should not fail
+* Expected Res : Success
+* Date : Feb 23rd 2024
+*/
+USE test;
+
+SELECT a.not_a_field1 IS MISSING as f1, a.user.not_a_field2 IS MISSING as f2
+FROM AvroDataset a
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/multi-file-multi-schema/multi-file-multi-schema.1.ddl.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/multi-file-multi-schema/multi-file-multi-schema.1.ddl.sqlpp
new file mode 100644
index 0000000000..67e38d2903
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/multi-file-multi-schema/multi-file-multi-schema.1.ddl.sqlpp
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Description : Retrieve all fields from different Avro files with different
schemas
+ * Expected Res : Success
+ * Date : Feb 23rd 2024
+ */
+
+DROP DATAVERSE test IF EXISTS;
+CREATE DATAVERSE test;
+
+USE test;
+
+
+CREATE TYPE AvroType as {
+};
+
+CREATE EXTERNAL DATASET AvroDataset(AvroType) USING %adapter%
+(
+ %template%,
+ ("container"="playground"),
+ ("definition"="avro-data/reviews"),
+ ("include"="*id_age.avro"),
+ ("include#1"="*id_name.avro"),
+ ("format" = "avro")
+);
+
+CREATE EXTERNAL DATASET AvroDataset2(AvroType) USING %adapter%
+(
+ %template%,
+ ("container"="playground"),
+ ("definition"="avro-data/reviews"),
+ ("include"="*id_age.avro"),
+ ("include#1"="*id_age-string.avro"),
+ ("format" = "avro")
+);
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/multi-file-multi-schema/multi-file-multi-schema.2.query.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/multi-file-multi-schema/multi-file-multi-schema.2.query.sqlpp
new file mode 100644
index 0000000000..1df5ada2c6
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/multi-file-multi-schema/multi-file-multi-schema.2.query.sqlpp
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Description : Retrieve all fields from different Avro files with different
schemas
+ * Expected Res : Success
+ * Date : Feb 23rd 2024
+ */
+USE test;
+
+SELECT VALUE a
+FROM AvroDataset a
+ORDER BY a.id;
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/multi-file-multi-schema/multi-file-multi-schema.3.query.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/multi-file-multi-schema/multi-file-multi-schema.3.query.sqlpp
new file mode 100644
index 0000000000..a9b28f6dee
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/multi-file-multi-schema/multi-file-multi-schema.3.query.sqlpp
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Description : Retrieve all fields from different Avro files with different
schemas
+ with conflicting fields
+ * Expected Res : Success
+ * Date : Feb 23rd 2024
+ */
+
+USE test;
+
+SELECT VALUE a
+FROM AvroDataset2 a
+ORDER BY a.id;
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/multi-file-multi-schema/multi-file-multi-schema.4.query.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/multi-file-multi-schema/multi-file-multi-schema.4.query.sqlpp
new file mode 100644
index 0000000000..e84e089673
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/multi-file-multi-schema/multi-file-multi-schema.4.query.sqlpp
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Description : Retrieve all fields from different Avro files with different
schemas
+ with conflicting fields
+ * Expected Res : Success
+ * Date : Feb 23rd 2024
+ */
+
+USE test;
+
+SELECT VALUE a.age
+FROM AvroDataset2 a
+ORDER BY a.id;
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/no-files/no-files.1.ddl.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/no-files/no-files.1.ddl.sqlpp
new file mode 100644
index 0000000000..507d6fa4c7
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/no-files/no-files.1.ddl.sqlpp
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+* Description : No files
+* Expected Res : Warning: The provided external dataset configuration returned
no files from the external source
+* Date : Feb 23rd 2024
+*/
+
+-- param max-warnings:json=1000
+
+DROP DATAVERSE test IF EXISTS;
+CREATE DATAVERSE test;
+
+USE test;
+
+
+CREATE TYPE AvroType as {
+};
+
+CREATE EXTERNAL DATASET AvroDataset(AvroType) USING %adapter%
+(
+ %template%,
+ ("container"="playground"),
+ ("definition"="NOT_A_DEFINITION"),
+ ("format" = "avro")
+);
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/no-files/no-files.2.query.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/no-files/no-files.2.query.sqlpp
new file mode 100644
index 0000000000..6677ed6485
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/no-files/no-files.2.query.sqlpp
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+* Description : No files
+* Expected Res : Warning: The provided external dataset configuration returned
no files from the external source
+* Date : Feb 23rd 2024
+*/
+
+-- param max-warnings:json=1000
+
+USE test;
+
+SELECT VALUE COUNT(*)
+FROM AvroDataset a
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/object-concat/object-concat.1.ddl.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/object-concat/object-concat.1.ddl.sqlpp
new file mode 100644
index 0000000000..c7f127b3e6
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/object-concat/object-concat.1.ddl.sqlpp
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+* Description : Concat two objects after pushdown
+* Expected Res : Success
+* Date : Feb 23rd 2024
+*/
+
+DROP DATAVERSE test IF EXISTS;
+CREATE DATAVERSE test;
+
+USE test;
+
+
+CREATE TYPE AvroType as {
+};
+
+CREATE EXTERNAL DATASET AvroDataset(AvroType) USING %adapter%
+(
+ %template%,
+ ("container"="playground"),
+ ("definition"="avro-data/reviews"),
+ ("include"="*dummy_tweet.avro"),
+ ("format" = "avro")
+);
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/object-concat/object-concat.2.query.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/object-concat/object-concat.2.query.sqlpp
new file mode 100644
index 0000000000..801daa3042
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/object-concat/object-concat.2.query.sqlpp
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+* Description : Concat two objects after pushdown
+* Expected Res : Success
+* Date : Feb 23rd 2024
+*/
+USE test;
+
+
+SELECT VALUE object_concat(a.coordinates, a.user).name
+FROM AvroDataset a
+ORDER BY a.id;
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/select-all-fields/select-all-fields.1.ddl.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/select-all-fields/select-all-fields.1.ddl.sqlpp
new file mode 100644
index 0000000000..6d77dab57e
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/select-all-fields/select-all-fields.1.ddl.sqlpp
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+* Description : Retrieve all fields from an Avro file
+* Expected Res : Success
+* Date : Feb 23rd 2024
+*/
+
+DROP DATAVERSE test IF EXISTS;
+CREATE DATAVERSE test;
+
+USE test;
+
+
+CREATE TYPE AvroType as {
+};
+
+CREATE EXTERNAL DATASET AvroDataset(AvroType) USING %adapter%
+(
+%template%,
+("container"="playground"),
+("definition"="avro-data/reviews"),
+("include"="*dummy_tweet.avro"),
+("format" = "avro")
+);
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/select-all-fields/select-all-fields.2.query.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/select-all-fields/select-all-fields.2.query.sqlpp
new file mode 100644
index 0000000000..0d2018128e
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/select-all-fields/select-all-fields.2.query.sqlpp
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+* Description : Retrieve all fields from an Avro file
+* Expected Res : Success
+* Date : Feb 23rd 2024
+*/
+USE test;
+
+SELECT VALUE a
+FROM AvroDataset a
+ORDER BY a.id;
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/select-count-one-field/select-count-one-field.1.ddl.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/select-count-one-field/select-count-one-field.1.ddl.sqlpp
new file mode 100644
index 0000000000..052b1272ad
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/select-count-one-field/select-count-one-field.1.ddl.sqlpp
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+* Description : Retrieve the number of texts in all tweets
+* Expected Res : Success
+* Date : Feb 23rd 2024
+*/
+
+DROP DATAVERSE test IF EXISTS;
+CREATE DATAVERSE test;
+
+USE test;
+
+
+CREATE TYPE AvroType as {
+};
+
+CREATE EXTERNAL DATASET AvroDataset(AvroType) USING %adapter%
+(
+ %template%,
+ ("container"="playground"),
+ ("definition"="avro-data/reviews"),
+ ("include"="*dummy_tweet.avro"),
+ ("format" = "avro")
+);
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/select-count-one-field/select-count-one-field.2.query.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/select-count-one-field/select-count-one-field.2.query.sqlpp
new file mode 100644
index 0000000000..3728b230ae
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/select-count-one-field/select-count-one-field.2.query.sqlpp
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+* Description : Retrieve the number of texts in all tweets
+* Expected Res : Success
+* Date : Feb 23rd 2024
+*/
+USE test;
+
+SELECT VALUE count(a.text)
+FROM AvroDataset a;
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/string-standard-utf8/string-standard-utf8.1.ddl.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/string-standard-utf8/string-standard-utf8.1.ddl.sqlpp
new file mode 100644
index 0000000000..17433a5a8e
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/string-standard-utf8/string-standard-utf8.1.ddl.sqlpp
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+* Description : Test Standard UTF-8
+* Expected Res : Success
+* Date : Feb 23rd 2024
+*/
+
+DROP DATAVERSE test IF EXISTS;
+CREATE DATAVERSE test;
+
+USE test;
+
+
+CREATE TYPE AvroType as {
+};
+
+CREATE EXTERNAL DATASET AvroDataset(AvroType) USING %adapter%
+(
+ %template%,
+ ("container"="playground"),
+ ("definition"="avro-data/reviews"),
+ ("include"="*id_name_comment.avro"),
+ ("format" = "avro")
+);
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/string-standard-utf8/string-standard-utf8.2.query.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/string-standard-utf8/string-standard-utf8.2.query.sqlpp
new file mode 100644
index 0000000000..7681ab3015
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/string-standard-utf8/string-standard-utf8.2.query.sqlpp
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+* Description : Test Standard UTF-8
+* Expected Res : Success
+* Date : Feb 23rd 2024
+*/
+USE test;
+
+
+SELECT VALUE array_count(split(trim(a.comment),"𩸽"))
+FROM AvroDataset a
+WHERE contains(a.comment, "𩸽");
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/type-mismatch/type-mismatch.1.ddl.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/type-mismatch/type-mismatch.1.ddl.sqlpp
new file mode 100644
index 0000000000..c8d7d212fa
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/type-mismatch/type-mismatch.1.ddl.sqlpp
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+* Description : Type mismatch DDL
+* Expected Res : Success
+* Date : Feb 23rd 2024
+*/
+
+DROP DATAVERSE test IF EXISTS;
+CREATE DATAVERSE test;
+
+USE test;
+
+
+CREATE TYPE AvroType as {
+};
+
+CREATE EXTERNAL DATASET AvroDataset(AvroType) USING %adapter%
+(
+ %template%,
+ ("container"="playground"),
+ ("definition"="avro-data/reviews"),
+ ("include"="*dummy_tweet.avro"),
+ ("format" = "avro")
+);
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/type-mismatch/type-mismatch.2.query.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/type-mismatch/type-mismatch.2.query.sqlpp
new file mode 100644
index 0000000000..a3bb6e9440
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/type-mismatch/type-mismatch.2.query.sqlpp
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+* Description : Accessing an array as an object
+* Expected Res : Warning
+* Date : Feb 23rd 2024
+*/
+
+-- param max-warnings:json=1000
+
+USE test;
+
+
+SELECT VALUE a.entities.urls.display_url IS MISSING
+FROM AvroDataset a
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/type-mismatch/type-mismatch.3.query.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/type-mismatch/type-mismatch.3.query.sqlpp
new file mode 100644
index 0000000000..79ee625f26
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/type-mismatch/type-mismatch.3.query.sqlpp
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+* Description : Accessing an array as an object
+* Expected Res : Warning
+* Date : Feb 23rd 2024
+*/
+
+-- param max-warnings:json=1000
+
+USE test;
+
+SELECT VALUE a.place.bounding_box.coordinates[0][0].not_object IS MISSING
+FROM AvroDataset a;
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/array-access/array-access.02.adm
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/array-access/array-access.02.adm
new file mode 100644
index 0000000000..18f3275beb
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/array-access/array-access.02.adm
@@ -0,0 +1,2 @@
+{ "display_url": "string" }
+{ }
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/array-access/array-access.03.adm
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/array-access/array-access.03.adm
new file mode 100644
index 0000000000..695240b26c
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/array-access/array-access.03.adm
@@ -0,0 +1 @@
+{ "display_url": [ "string" ] }
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/array-access/array-access.04.adm
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/array-access/array-access.04.adm
new file mode 100644
index 0000000000..41c14f5712
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/array-access/array-access.04.adm
@@ -0,0 +1 @@
+{ "display_url": "string" }
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/array-access/array-access.05.adm
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/array-access/array-access.05.adm
new file mode 100644
index 0000000000..d8263ee986
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/array-access/array-access.05.adm
@@ -0,0 +1 @@
+2
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/array-access/array-access.06.adm
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/array-access/array-access.06.adm
new file mode 100644
index 0000000000..15eecd22cf
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/array-access/array-access.06.adm
@@ -0,0 +1,2 @@
+1.1
+1.1
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/avro-types/avro-map/avro-map.02.adm
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/avro-types/avro-map/avro-map.02.adm
new file mode 100644
index 0000000000..5560bf92f4
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/avro-types/avro-map/avro-map.02.adm
@@ -0,0 +1,2 @@
+{ "key1": 1, "key2": 2 }
+{ "key3": 3, "key4": 4 }
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/avro-types/avro-map/avro-map.03.adm
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/avro-types/avro-map/avro-map.03.adm
new file mode 100644
index 0000000000..fe0b81feb1
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/avro-types/avro-map/avro-map.03.adm
@@ -0,0 +1,2 @@
+1
+null
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/avro-types/avro-nested-records/avro-nested-records.02.adm
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/avro-types/avro-nested-records/avro-nested-records.02.adm
new file mode 100644
index 0000000000..db09d84496
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/avro-types/avro-nested-records/avro-nested-records.02.adm
@@ -0,0 +1,2 @@
+{ "nestedInt": 100, "nestedString": "Inside Nested" }
+{ "nestedInt": 100, "nestedString": "Inside Nested" }
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/avro-types/avro-nested-records/avro-nested-records.03.adm
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/avro-types/avro-nested-records/avro-nested-records.03.adm
new file mode 100644
index 0000000000..4eb537c528
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/avro-types/avro-nested-records/avro-nested-records.03.adm
@@ -0,0 +1,2 @@
+100
+100
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/avro-types/avro-primitives/avro-primitives.02.adm
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/avro-types/avro-primitives/avro-primitives.02.adm
new file mode 100644
index 0000000000..d25232800f
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/avro-types/avro-primitives/avro-primitives.02.adm
@@ -0,0 +1,2 @@
+true
+false
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/avro-types/avro-primitives/avro-primitives.03.adm
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/avro-types/avro-primitives/avro-primitives.03.adm
new file mode 100644
index 0000000000..8a642e5efc
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/avro-types/avro-primitives/avro-primitives.03.adm
@@ -0,0 +1,2 @@
+{ "intField": 32, "longField": 64, "floatField": 1.0, "doubleField": 2.0 }
+{ "intField": 54, "longField": 60, "floatField": 3.6, "doubleField": 5.77777 }
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/avro-types/avro-union/avro-union.02.adm
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/avro-types/avro-union/avro-union.02.adm
new file mode 100644
index 0000000000..15f87769f0
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/avro-types/avro-union/avro-union.02.adm
@@ -0,0 +1,2 @@
+42
+"Example string"
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/field-access/field-access.02.adm
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/field-access/field-access.02.adm
new file mode 100644
index 0000000000..7e235c5d1e
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/field-access/field-access.02.adm
@@ -0,0 +1,7 @@
+{ "id": 8, "age": 10 }
+{ "id": 9, "age": 20 }
+{ "id": 10, "age": 30 }
+{ "id": 11, "age": 40 }
+{ "id": 12, "age": 50 }
+{ "id": 13, "age": 60 }
+{ "id": 14, "age": 70 }
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/field-access/field-access.03.adm
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/field-access/field-access.03.adm
new file mode 100644
index 0000000000..aaefab95d3
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/field-access/field-access.03.adm
@@ -0,0 +1,7 @@
+{ "a1": { "id": 8, "age": 10 }, "id": 8 }
+{ "a1": { "id": 9, "age": 20 }, "id": 9 }
+{ "a1": { "id": 10, "age": 30 }, "id": 10 }
+{ "a1": { "id": 11, "age": 40 }, "id": 11 }
+{ "a1": { "id": 12, "age": 50 }, "id": 12 }
+{ "a1": { "id": 13, "age": 60 }, "id": 13 }
+{ "a1": { "id": 14, "age": 70 }, "id": 14 }
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/field-access/field-access.04.adm
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/field-access/field-access.04.adm
new file mode 100644
index 0000000000..aa3f80116c
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/field-access/field-access.04.adm
@@ -0,0 +1 @@
+{ "age": 10, "name": "William" }
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/field-access/field-access.05.adm
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/field-access/field-access.05.adm
new file mode 100644
index 0000000000..a1ad24e843
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/field-access/field-access.05.adm
@@ -0,0 +1,2 @@
+{ "id": 1, "name": "string" }
+{ "id": 1, "name": "string" }
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/field-access/field-access.06.adm
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/field-access/field-access.06.adm
new file mode 100644
index 0000000000..d8263ee986
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/field-access/field-access.06.adm
@@ -0,0 +1 @@
+2
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/heterogeneous-access/heterogeneous-access.02.adm
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/heterogeneous-access/heterogeneous-access.02.adm
new file mode 100644
index 0000000000..1ebe99366f
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/heterogeneous-access/heterogeneous-access.02.adm
@@ -0,0 +1,6 @@
+[ "1", "2" ]
+[ "3", "4" ]
+[ "5", "6" ]
+"7"
+"8"
+"9"
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/missing-fields/missing-fields.2.adm
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/missing-fields/missing-fields.2.adm
new file mode 100644
index 0000000000..8876910a54
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/missing-fields/missing-fields.2.adm
@@ -0,0 +1,2 @@
+{ "f1": true, "f2": true }
+{ "f1": true, "f2": true }
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/multi-file-multi-schema/multi-file-multi-schema.2.adm
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/multi-file-multi-schema/multi-file-multi-schema.2.adm
new file mode 100644
index 0000000000..0e2b98093e
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/multi-file-multi-schema/multi-file-multi-schema.2.adm
@@ -0,0 +1,14 @@
+{ "id": 1, "name": "John" }
+{ "id": 2, "name": "Abel" }
+{ "id": 3, "name": "Sandy" }
+{ "id": 4, "name": "Alex" }
+{ "id": 5, "name": "Mike" }
+{ "id": 6, "name": "Tom" }
+{ "id": 7, "name": "Jerry" }
+{ "id": 8, "age": 10 }
+{ "id": 9, "age": 20 }
+{ "id": 10, "age": 30 }
+{ "id": 11, "age": 40 }
+{ "id": 12, "age": 50 }
+{ "id": 13, "age": 60 }
+{ "id": 14, "age": 70 }
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/multi-file-multi-schema/multi-file-multi-schema.3.adm
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/multi-file-multi-schema/multi-file-multi-schema.3.adm
new file mode 100644
index 0000000000..8df138d502
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/multi-file-multi-schema/multi-file-multi-schema.3.adm
@@ -0,0 +1,14 @@
+{ "id": 8, "age": 10 }
+{ "id": 9, "age": 20 }
+{ "id": 10, "age": 30 }
+{ "id": 11, "age": 40 }
+{ "id": 12, "age": 50 }
+{ "id": 13, "age": 60 }
+{ "id": 14, "age": 70 }
+{ "id": 15, "age": "10" }
+{ "id": 16, "age": "20" }
+{ "id": 17, "age": "30" }
+{ "id": 18, "age": "40" }
+{ "id": 19, "age": "50" }
+{ "id": 20, "age": "60" }
+{ "id": 21, "age": "70" }
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/multi-file-multi-schema/multi-file-multi-schema.4.adm
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/multi-file-multi-schema/multi-file-multi-schema.4.adm
new file mode 100644
index 0000000000..9256ef597f
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/multi-file-multi-schema/multi-file-multi-schema.4.adm
@@ -0,0 +1,14 @@
+10
+20
+30
+40
+50
+60
+70
+"10"
+"20"
+"30"
+"40"
+"50"
+"60"
+"70"
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/no-files/no-files.02.adm
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/no-files/no-files.02.adm
new file mode 100644
index 0000000000..c227083464
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/no-files/no-files.02.adm
@@ -0,0 +1 @@
+0
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/object-concat/object-concat.2.adm
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/object-concat/object-concat.2.adm
new file mode 100644
index 0000000000..1b425f720c
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/object-concat/object-concat.2.adm
@@ -0,0 +1,2 @@
+"string"
+"string"
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/select-all-fields/select-all-fields.2.adm
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/select-all-fields/select-all-fields.2.adm
new file mode 100644
index 0000000000..53f2518eb9
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/select-all-fields/select-all-fields.2.adm
@@ -0,0 +1,2 @@
+{ "coordinates": { "coordinates": [ 1.1 ], "type": "string" }, "created_at":
"string", "entities": { "urls": [ { "display_url": "string", "expanded_url":
"string", "indices": [ 1 ], "url": "string" } ], "user_mentions": [ { "id": 1,
"id_str": "string", "indices": [ 1 ], "name": "string", "screen_name": "string"
} ] }, "favorite_count": 1, "favorited": true, "filter_level": "string", "geo":
{ "coordinates": [ 1.1 ], "type": "string" }, "id": "0000000", "id_str":
"string", "in_reply_to_scr [...]
+{ "coordinates": { "coordinates": [ 1.1 ], "type": "string" }, "created_at":
"string", "favorite_count": 1, "favorited": true, "filter_level": "string",
"geo": { "coordinates": [ 1.1 ], "type": "string" }, "id":
"11111111111111111111", "id_str": "string", "in_reply_to_screen_name":
"string", "in_reply_to_status_id": 1, "in_reply_to_status_id_str": "string",
"in_reply_to_user_id": 1, "in_reply_to_user_id_str": "string",
"is_quote_status": true, "lang": "string", "place": { "bounding_box": [...]
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/select-count-one-field/select-count-one-field.2.adm
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/select-count-one-field/select-count-one-field.2.adm
new file mode 100644
index 0000000000..d8263ee986
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/select-count-one-field/select-count-one-field.2.adm
@@ -0,0 +1 @@
+2
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/string-standard-utf8/string-standard-utf8.2.adm
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/string-standard-utf8/string-standard-utf8.2.adm
new file mode 100644
index 0000000000..6fb86be0d9
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/string-standard-utf8/string-standard-utf8.2.adm
@@ -0,0 +1,2 @@
+2
+301
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/type-mismatch/type-mismatch.02.adm
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/type-mismatch/type-mismatch.02.adm
new file mode 100644
index 0000000000..0be5d980da
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/type-mismatch/type-mismatch.02.adm
@@ -0,0 +1,2 @@
+true
+true
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/type-mismatch/type-mismatch.03.adm
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/type-mismatch/type-mismatch.03.adm
new file mode 100644
index 0000000000..0be5d980da
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/avro/type-mismatch/type-mismatch.03.adm
@@ -0,0 +1,2 @@
+true
+true
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
index db4b5a8e8f..57474b3277 100644
---
a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
@@ -294,6 +294,111 @@
</compilation-unit>
</test-case>
<!-- Parquet Tests End -->
+ <test-case FilePath="external-dataset">
+ <compilation-unit name="common/avro/avro-types/avro-map">
+ <placeholder name="adapter" value="S3" />
+ <output-dir compare="Text">common/avro/avro-types/avro-map</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="external-dataset">
+ <compilation-unit name="common/avro/avro-types/avro-nested-records">
+ <placeholder name="adapter" value="S3" />
+ <output-dir
compare="Text">common/avro/avro-types/avro-nested-records</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="external-dataset">
+ <compilation-unit name="common/avro/avro-types/avro-primitives">
+ <placeholder name="adapter" value="S3" />
+ <output-dir
compare="Text">common/avro/avro-types/avro-primitives</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="external-dataset">
+ <compilation-unit name="common/avro/avro-types/avro-union">
+ <placeholder name="adapter" value="S3" />
+ <output-dir
compare="Text">common/avro/avro-types/avro-union</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="external-dataset">
+ <compilation-unit name="common/avro/type-mismatch">
+ <placeholder name="adapter" value="S3" />
+ <output-dir compare="Text">common/avro/type-mismatch</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="external-dataset">
+ <compilation-unit name="common/avro/string-standard-utf8">
+ <placeholder name="adapter" value="S3" />
+ <output-dir
compare="Text">common/avro/string-standard-utf8</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="external-dataset">
+ <compilation-unit name="common/avro/select-all-fields">
+ <placeholder name="adapter" value="S3" />
+ <output-dir compare="Text">common/avro/select-all-fields</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="external-dataset">
+ <compilation-unit name="common/avro/select-count-one-field">
+ <placeholder name="adapter" value="S3" />
+ <output-dir
compare="Text">common/avro/select-count-one-field</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="external-dataset">
+ <compilation-unit name="common/avro/array-access">
+ <placeholder name="adapter" value="S3" />
+ <output-dir compare="Text">common/avro/array-access</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="external-dataset">
+ <compilation-unit name="common/avro/field-access">
+ <placeholder name="adapter" value="S3" />
+ <output-dir compare="Text">common/avro/field-access</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="external-dataset">
+ <compilation-unit name="common/avro/heterogeneous-access">
+ <placeholder name="adapter" value="S3" />
+ <output-dir
compare="Text">common/avro/heterogeneous-access</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="external-dataset">
+ <compilation-unit name="common/avro/invalid-avro-files">
+ <placeholder name="adapter" value="S3" />
+ <output-dir compare="Text">none</output-dir>
+ <source-location>false</source-location>
+ <expected-error>Not an Avro data file.</expected-error>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="external-dataset">
+ <compilation-unit name="common/avro/invalid-type">
+ <placeholder name="adapter" value="S3" />
+ <output-dir compare="Text">none</output-dir>
+ <expected-error>ASX3123: Type 'AvroType' contains declared fields,
which is not supported for 'avro' format</expected-error>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="external-dataset">
+ <compilation-unit name="common/avro/missing-fields">
+ <placeholder name="adapter" value="S3" />
+ <output-dir compare="Text">common/avro/missing-fields</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="external-dataset">
+ <compilation-unit name="common/avro/multi-file-multi-schema">
+ <placeholder name="adapter" value="S3" />
+ <output-dir
compare="Text">common/avro/multi-file-multi-schema</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="external-dataset">
+ <compilation-unit name="common/avro/no-files">
+ <placeholder name="adapter" value="S3" />
+ <output-dir compare="Text">common/avro/no-files</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="external-dataset">
+ <compilation-unit name="common/avro/object-concat">
+ <placeholder name="adapter" value="S3" />
+ <output-dir compare="Text">common/avro/object-concat</output-dir>
+ </compilation-unit>
+ </test-case>
<!-- Dynamic prefixes tests start -->
<test-case FilePath="external-dataset/common/dynamic-prefixes">
<compilation-unit name="embed-with-closed-type">
diff --git
a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/exceptions/ErrorCode.java
b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/exceptions/ErrorCode.java
index e5c137e63a..ef54ba821d 100644
---
a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/exceptions/ErrorCode.java
+++
b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/exceptions/ErrorCode.java
@@ -412,6 +412,8 @@ public enum ErrorCode implements IError {
PARSER_DATA_PARSER_UNEXPECTED_TOKEN(3120),
REQUIRED_PARAM_OR_PARAM_IF_PARAM_IS_PRESENT(3121),
PARAM_NOT_ALLOWED_IF_PARAM_IS_PRESENT(3122),
+ // Avro error
+ UNSUPPORTED_TYPE_FOR_AVRO(3123),
// Lifecycle management errors
DUPLICATE_PARTITION_ID(4000),
diff --git
a/asterixdb/asterix-common/src/main/resources/asx_errormsg/en.properties
b/asterixdb/asterix-common/src/main/resources/asx_errormsg/en.properties
index b3eb3a3911..45b6185461 100644
--- a/asterixdb/asterix-common/src/main/resources/asx_errormsg/en.properties
+++ b/asterixdb/asterix-common/src/main/resources/asx_errormsg/en.properties
@@ -417,6 +417,7 @@
3120 = Unexpected token %s: was expecting %s
3121 = Parameter '%1$s' or '%2$s' is required if '%3$s' is provided
3122 = Parameter '%1$s' is not allowed if '%2$s' is provided
+3123 = Type '%1$s' contains declared fields, which is not supported for 'avro'
format
# Lifecycle management errors
4000 = Partition id %1$s for node %2$s already in use by node %3$s
diff --git
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/AvroDataParser.java
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/AvroDataParser.java
index d1744ebe08..d760c1f60f 100644
---
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/AvroDataParser.java
+++
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/AvroDataParser.java
@@ -18,10 +18,14 @@
*/
package org.apache.asterix.external.parser;
+import static org.apache.avro.Schema.Type.NULL;
+
import java.io.DataOutput;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.Collection;
+import java.util.List;
+import java.util.Map;
import org.apache.asterix.builders.IARecordBuilder;
import org.apache.asterix.builders.IAsterixListBuilder;
@@ -35,6 +39,7 @@ import org.apache.asterix.om.base.ABoolean;
import org.apache.asterix.om.base.ANull;
import org.apache.asterix.om.pointables.base.DefaultOpenFieldType;
import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericRecord;
import org.apache.hyracks.api.exceptions.HyracksDataException;
import org.apache.hyracks.data.std.api.IMutableValueStorage;
@@ -57,13 +62,15 @@ public class AvroDataParser extends AbstractDataParser
implements IRecordDataPar
}
private final void parseObject(GenericRecord record, DataOutput out)
throws IOException {
- Schema schema = record.getSchema();
IMutableValueStorage valueBuffer = parserContext.enterObject();
IARecordBuilder objectBuilder =
parserContext.getObjectBuilder(DefaultOpenFieldType.NESTED_OPEN_RECORD_TYPE);
+ Schema schema = record.getSchema();
for (Schema.Field field : schema.getFields()) {
- valueBuffer.reset();
- parseValue(field.schema(), record.get(field.name()),
valueBuffer.getDataOutput());
-
objectBuilder.addField(parserContext.getSerializedFieldName(field.name()),
valueBuffer);
+ if (record.get(field.name()) != null) {
+ valueBuffer.reset();
+ parseValue(field.schema(), record.get(field.name()),
valueBuffer.getDataOutput());
+
objectBuilder.addField(parserContext.getSerializedFieldName(field.name()),
valueBuffer);
+ }
}
objectBuilder.write(out, true);
parserContext.exitObject(valueBuffer, null, objectBuilder);
@@ -83,6 +90,58 @@ public class AvroDataParser extends AbstractDataParser
implements IRecordDataPar
parserContext.exitCollection(valueBuffer, arrayBuilder);
}
+ private void parseMap(Schema mapSchema, Map<String, ?> map, DataOutput
out) throws IOException {
+ Schema valueSchema = mapSchema.getValueType();
+ final IMutableValueStorage valueBuffer =
parserContext.enterCollection();
+ final IMutableValueStorage keyBuffer = parserContext.enterCollection();
+ IARecordBuilder objectBuilder =
parserContext.getObjectBuilder(DefaultOpenFieldType.NESTED_OPEN_RECORD_TYPE);
+ for (Map.Entry<String, ?> entry : map.entrySet()) {
+ keyBuffer.reset();
+ valueBuffer.reset();
+ serializeString(entry.getKey(), Schema.Type.STRING,
keyBuffer.getDataOutput());
+ parseValue(valueSchema, entry.getValue(),
valueBuffer.getDataOutput());
+ objectBuilder.addField(keyBuffer, valueBuffer);
+ }
+ objectBuilder.write(out, true);
+ parserContext.exitObject(valueBuffer, null, objectBuilder);
+ }
+
+ private final void parseUnion(Schema unionSchema, Object value, DataOutput
out) throws IOException {
+ List<Schema> possibleTypes = unionSchema.getTypes();
+ for (Schema possibleType : possibleTypes) {
+ Schema.Type schemaType = possibleType.getType();
+ if (possibleType.getType() != NULL) {
+ if (matchesType(value, schemaType)) {
+ parseValue(possibleType, value, out);
+ return;
+ }
+ }
+ }
+ }
+
+ private boolean matchesType(Object value, Schema.Type schemaType) {
+ switch (schemaType) {
+ case INT:
+ return value instanceof Integer;
+ case STRING:
+ return value instanceof CharSequence;
+ case LONG:
+ return value instanceof Long;
+ case FLOAT:
+ return value instanceof Float;
+ case DOUBLE:
+ return value instanceof Double;
+ case BOOLEAN:
+ return value instanceof Boolean;
+ case BYTES:
+ return value instanceof Byte;
+ case RECORD:
+ return value instanceof GenericData.Record;
+ default:
+ return false;
+ }
+ }
+
private void parseValue(Schema schema, Object value, DataOutput out)
throws IOException {
Schema.Type type = schema.getType();
switch (type) {
@@ -92,8 +151,12 @@ public class AvroDataParser extends AbstractDataParser
implements IRecordDataPar
case ARRAY:
parseArray(schema, (Collection<?>) value, out);
break;
- case MAP:
case UNION:
+ parseUnion(schema, value, out);
+ break;
+ case MAP:
+ parseMap(schema, (Map<String, ?>) value, out);
+ break;
case ENUM:
case FIXED:
case NULL:
diff --git
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java
index 189e0d8e78..ae3b567f79 100644
---
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java
+++
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java
@@ -917,6 +917,19 @@ public class ExternalDataUtils {
||
ExternalDataConstants.FORMAT_PARQUET.equals(properties.get(ExternalDataConstants.KEY_FORMAT));
}
+ public static void validateAvroTypeAndConfiguration(Map<String, String>
properties, ARecordType datasetRecordType)
+ throws CompilationException {
+ if (isAvroFormat(properties)) {
+ if (datasetRecordType.getFieldTypes().length != 0) {
+ throw new
CompilationException(ErrorCode.UNSUPPORTED_TYPE_FOR_AVRO,
datasetRecordType.getTypeName());
+ }
+ }
+ }
+
+ public static boolean isAvroFormat(Map<String, String> properties) {
+ return
ExternalDataConstants.FORMAT_AVRO.equals(properties.get(ExternalDataConstants.KEY_FORMAT));
+ }
+
public static void
setExternalDataProjectionInfo(ExternalDatasetProjectionFiltrationInfo
projectionInfo,
Map<String, String> properties) throws IOException {
properties.put(ExternalDataConstants.KEY_REQUESTED_FIELDS,