This is an automated email from the ASF dual-hosted git repository.

mhubail pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/asterixdb.git


The following commit(s) were added to refs/heads/master by this push:
     new d687ba59ef [ASTERIXDB-3519][EXT]: Support reading avro files from HDFS
d687ba59ef is described below

commit d687ba59efeebae62fbe72a82257540444cf5134
Author: Savyasach Reddy <[email protected]>
AuthorDate: Mon Dec 2 14:30:29 2024 +0530

    [ASTERIXDB-3519][EXT]: Support reading avro files from HDFS
    
    - user model changes: support reading avro records
    - storage format changes: no
    - interface changes: no
    
    details:
    - support reading avro from hdfs
    
    Ext-ref: MB-63117
    Change-Id: I7da0b293479df04213c7301391c644c57665eda7
    Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/19166
    Integration-Tests: Jenkins <[email protected]>
    Reviewed-by: Murtadha Hubail <[email protected]>
    Tested-by: Murtadha Hubail <[email protected]>
---
 .../avro/array-access/array-access.01.ddl.sqlpp    |   4 +-
 .../avro/avro-types/avro-map/avro-map.01.ddl.sqlpp |   4 +-
 .../avro-nested-records.01.ddl.sqlpp               |   4 +-
 .../avro-primitives/avro-primitives.01.ddl.sqlpp   |   4 +-
 .../avro-types/avro-union/avro-union.01.ddl.sqlpp  |   4 +-
 .../avro/field-access/field-access.01.ddl.sqlpp    |  16 +-
 .../heterogeneous-access.1.ddl.sqlpp               |   4 +-
 .../avro/invalid-avro-files/test.000.ddl.sqlpp     |   4 +-
 .../avro/invalid-type/invalid-type.1.ddl.sqlpp     |   4 +-
 .../avro/missing-fields/missing-fields.1.ddl.sqlpp |   4 +-
 .../multi-file-multi-schema.1.ddl.sqlpp            |   8 +-
 .../common/avro/no-files/no-files.1.ddl.sqlpp      |   2 +-
 .../avro/object-concat/object-concat.1.ddl.sqlpp   |   4 +-
 .../select-all-fields.1.ddl.sqlpp                  |   4 +-
 .../select-count-one-field.1.ddl.sqlpp             |   4 +-
 .../string-standard-utf8.1.ddl.sqlpp               |   4 +-
 .../avro/type-mismatch/type-mismatch.1.ddl.sqlpp   |   4 +-
 .../avro/embed-flat/embed-flat.000.ddl.sqlpp       |   4 +-
 .../embed-multiple-values.000.ddl.sqlpp            |   4 +-
 .../embed-one-value/embed-one-value.000.ddl.sqlpp  |   8 +-
 .../embed-flat.000.ddl.sqlpp                       |   4 +-
 .../avro/one-field/one-field.000.ddl.sqlpp         |   8 +-
 .../dynamic-prefixes/avro/query/test.000.ddl.sqlpp |  12 +-
 .../avro/using-limit/test.000.ddl.sqlpp            |   4 +-
 .../dynamic-prefixes/avro/views/test.000.ddl.sqlpp |   8 +-
 .../runtimets/testsuite_external_dataset_s3.xml    |  50 +++++
 .../resources/runtimets/testsuite_sqlpp_hdfs.xml   | 208 +++++++++++++++++++++
 asterixdb/asterix-external-data/pom.xml            |   5 +
 .../external/input/HDFSDataSourceFactory.java      |   7 +
 .../reader/hdfs/AbstractHDFSRecordReader.java      |  23 ++-
 .../input/record/reader/hdfs/HDFSRecordReader.java |   3 +-
 .../reader/hdfs/avro/AvroFileInputFormat.java      |  45 +++++
 .../reader/hdfs/avro/AvroFileRecordReader.java     | 156 ++++++++++++++++
 .../hdfs/parquet/ParquetFileRecordReader.java      |   3 +-
 .../external/util/ExternalDataConstants.java       |   6 +
 .../asterix/external/util/ExternalDataUtils.java   |   1 +
 .../apache/asterix/external/util/HDFSUtils.java    |   5 +
 37 files changed, 566 insertions(+), 80 deletions(-)

diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/array-access/array-access.01.ddl.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/array-access/array-access.01.ddl.sqlpp
index 606c7818f4..76ae4494c4 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/array-access/array-access.01.ddl.sqlpp
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/array-access/array-access.01.ddl.sqlpp
@@ -34,8 +34,8 @@ CREATE TYPE AvroType as {
 CREATE EXTERNAL DATASET AvroDataset(AvroType) USING %adapter%
 (
   %template%,
-  ("container"="playground"),
-  ("definition"="avro-data/reviews"),
+  %additional_Properties%,
+  ("definition"="%path_prefix%avro-data/reviews"),
   ("include"="*dummy_tweet.avro"),
   ("format" = "avro")
 );
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-map/avro-map.01.ddl.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-map/avro-map.01.ddl.sqlpp
index 65a2b38850..ca20804bac 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-map/avro-map.01.ddl.sqlpp
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-map/avro-map.01.ddl.sqlpp
@@ -34,8 +34,8 @@ CREATE TYPE AvroType as {
 CREATE EXTERNAL DATASET AvroDataset(AvroType) USING %adapter%
 (
   %template%,
-  ("container"="playground"),
-  ("definition"="avro-data/reviews"),
+  %additional_Properties%,
+  ("definition"="%path_prefix%avro-data/reviews"),
   ("include"="*avro_type.avro"),
   ("format" = "avro")
 );
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-nested-records/avro-nested-records.01.ddl.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-nested-records/avro-nested-records.01.ddl.sqlpp
index 65a2b38850..ca20804bac 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-nested-records/avro-nested-records.01.ddl.sqlpp
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-nested-records/avro-nested-records.01.ddl.sqlpp
@@ -34,8 +34,8 @@ CREATE TYPE AvroType as {
 CREATE EXTERNAL DATASET AvroDataset(AvroType) USING %adapter%
 (
   %template%,
-  ("container"="playground"),
-  ("definition"="avro-data/reviews"),
+  %additional_Properties%,
+  ("definition"="%path_prefix%avro-data/reviews"),
   ("include"="*avro_type.avro"),
   ("format" = "avro")
 );
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-primitives/avro-primitives.01.ddl.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-primitives/avro-primitives.01.ddl.sqlpp
index 65a2b38850..ca20804bac 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-primitives/avro-primitives.01.ddl.sqlpp
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-primitives/avro-primitives.01.ddl.sqlpp
@@ -34,8 +34,8 @@ CREATE TYPE AvroType as {
 CREATE EXTERNAL DATASET AvroDataset(AvroType) USING %adapter%
 (
   %template%,
-  ("container"="playground"),
-  ("definition"="avro-data/reviews"),
+  %additional_Properties%,
+  ("definition"="%path_prefix%avro-data/reviews"),
   ("include"="*avro_type.avro"),
   ("format" = "avro")
 );
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-union/avro-union.01.ddl.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-union/avro-union.01.ddl.sqlpp
index 65a2b38850..ca20804bac 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-union/avro-union.01.ddl.sqlpp
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-union/avro-union.01.ddl.sqlpp
@@ -34,8 +34,8 @@ CREATE TYPE AvroType as {
 CREATE EXTERNAL DATASET AvroDataset(AvroType) USING %adapter%
 (
   %template%,
-  ("container"="playground"),
-  ("definition"="avro-data/reviews"),
+  %additional_Properties%,
+  ("definition"="%path_prefix%avro-data/reviews"),
   ("include"="*avro_type.avro"),
   ("format" = "avro")
 );
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/field-access/field-access.01.ddl.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/field-access/field-access.01.ddl.sqlpp
index 5e30b26b45..74b7f179a9 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/field-access/field-access.01.ddl.sqlpp
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/field-access/field-access.01.ddl.sqlpp
@@ -34,8 +34,8 @@ CREATE TYPE AvroType as {
 CREATE EXTERNAL DATASET AvroDataset(AvroType) USING %adapter%
 (
   %template%,
-  ("container"="playground"),
-  ("definition"="avro-data/reviews"),
+  %additional_Properties%,
+  ("definition"="%path_prefix%avro-data/reviews"),
   ("include"="*id_age.avro"),
   ("format" = "avro")
 );
@@ -43,8 +43,8 @@ CREATE EXTERNAL DATASET AvroDataset(AvroType) USING %adapter%
 CREATE EXTERNAL DATASET AvroDataset2(AvroType) USING %adapter%
 (
   %template%,
-  ("container"="playground"),
-  ("definition"="avro-data/reviews"),
+  %additional_Properties%,
+  ("definition"="%path_prefix%avro-data/reviews"),
   ("include"="*id_age.avro"),
   ("format" = "avro")
 );
@@ -52,8 +52,8 @@ CREATE EXTERNAL DATASET AvroDataset2(AvroType) USING %adapter%
 CREATE EXTERNAL DATASET AvroDataset3(AvroType) USING %adapter%
 (
   %template%,
-  ("container"="playground"),
-  ("definition"="avro-data/reviews"),
+  %additional_Properties%,
+  ("definition"="%path_prefix%avro-data/reviews"),
   ("include"="*id_name_comment.avro"),
   ("format" = "avro")
 );
@@ -61,8 +61,8 @@ CREATE EXTERNAL DATASET AvroDataset3(AvroType) USING %adapter%
 CREATE EXTERNAL DATASET AvroDataset4(AvroType) USING %adapter%
 (
   %template%,
-  ("container"="playground"),
-  ("definition"="avro-data/reviews"),
+  %additional_Properties%,
+  ("definition"="%path_prefix%avro-data/reviews"),
   ("include"="*dummy_tweet.avro"),
   ("format" = "avro")
 );
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/heterogeneous-access/heterogeneous-access.1.ddl.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/heterogeneous-access/heterogeneous-access.1.ddl.sqlpp
index ce5eb8a257..9445a2c05e 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/heterogeneous-access/heterogeneous-access.1.ddl.sqlpp
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/heterogeneous-access/heterogeneous-access.1.ddl.sqlpp
@@ -34,8 +34,8 @@ CREATE TYPE AvroType as {
 CREATE EXTERNAL DATASET AvroDataset(AvroType) USING %adapter%
 (
   %template%,
-  ("container"="playground"),
-  ("definition"="avro-data/reviews"),
+  %additional_Properties%,
+  ("definition"="%path_prefix%avro-data/reviews"),
   ("include"="*heterogeneous*"),
   ("format" = "avro")
 );
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/invalid-avro-files/test.000.ddl.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/invalid-avro-files/test.000.ddl.sqlpp
index a3a6d0dffa..ac678fb82d 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/invalid-avro-files/test.000.ddl.sqlpp
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/invalid-avro-files/test.000.ddl.sqlpp
@@ -28,7 +28,7 @@ CREATE TYPE AvroType as {
 CREATE EXTERNAL DATASET AvroDataset(AvroType) USING %adapter%
 (
   %template%,
-  ("container"="playground"),
-  ("definition"="json-data/reviews/single-line/json"),
+  %additional_Properties%,
+  ("definition"="%path_prefix%json-data/reviews/single-line/json"),
   ("format" = "avro")
 );
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/invalid-type/invalid-type.1.ddl.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/invalid-type/invalid-type.1.ddl.sqlpp
index c6ee97004d..4f4e322634 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/invalid-type/invalid-type.1.ddl.sqlpp
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/invalid-type/invalid-type.1.ddl.sqlpp
@@ -35,8 +35,8 @@ CREATE TYPE AvroType as {
 CREATE EXTERNAL DATASET AvroDataset(AvroType) USING %adapter%
 (
   %template%,
-  ("container"="playground"),
-  ("definition"="avro-data/reviews"),
+  %additional_Properties%,
+  ("definition"="%path_prefix%avro-data/reviews"),
   ("include"="*id_age.avro"),
   ("format" = "avro")
 );
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/missing-fields/missing-fields.1.ddl.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/missing-fields/missing-fields.1.ddl.sqlpp
index 3c8e934a5a..74664c50d2 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/missing-fields/missing-fields.1.ddl.sqlpp
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/missing-fields/missing-fields.1.ddl.sqlpp
@@ -34,8 +34,8 @@ CREATE TYPE AvroType as {
 CREATE EXTERNAL DATASET AvroDataset(AvroType) USING %adapter%
 (
   %template%,
-  ("container"="playground"),
-  ("definition"="avro-data/reviews"),
+  %additional_Properties%,
+  ("definition"="%path_prefix%avro-data/reviews"),
   ("include"="*dummy_tweet.avro"),
   ("format" = "avro")
 );
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/multi-file-multi-schema/multi-file-multi-schema.1.ddl.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/multi-file-multi-schema/multi-file-multi-schema.1.ddl.sqlpp
index 67e38d2903..92dca50b83 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/multi-file-multi-schema/multi-file-multi-schema.1.ddl.sqlpp
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/multi-file-multi-schema/multi-file-multi-schema.1.ddl.sqlpp
@@ -34,8 +34,8 @@ CREATE TYPE AvroType as {
 CREATE EXTERNAL DATASET AvroDataset(AvroType) USING %adapter%
 (
   %template%,
-  ("container"="playground"),
-  ("definition"="avro-data/reviews"),
+  %additional_Properties%,
+  ("definition"="%path_prefix%avro-data/reviews"),
   ("include"="*id_age.avro"),
   ("include#1"="*id_name.avro"),
   ("format" = "avro")
@@ -44,8 +44,8 @@ CREATE EXTERNAL DATASET AvroDataset(AvroType) USING %adapter%
 CREATE EXTERNAL DATASET AvroDataset2(AvroType) USING %adapter%
 (
   %template%,
-  ("container"="playground"),
-  ("definition"="avro-data/reviews"),
+  %additional_Properties%,
+  ("definition"="%path_prefix%avro-data/reviews"),
   ("include"="*id_age.avro"),
   ("include#1"="*id_age-string.avro"),
   ("format" = "avro")
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/no-files/no-files.1.ddl.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/no-files/no-files.1.ddl.sqlpp
index 507d6fa4c7..950bfba61e 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/no-files/no-files.1.ddl.sqlpp
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/no-files/no-files.1.ddl.sqlpp
@@ -36,7 +36,7 @@ CREATE TYPE AvroType as {
 CREATE EXTERNAL DATASET AvroDataset(AvroType) USING %adapter%
 (
   %template%,
-  ("container"="playground"),
+  %additional_Properties%,
   ("definition"="NOT_A_DEFINITION"),
   ("format" = "avro")
 );
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/object-concat/object-concat.1.ddl.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/object-concat/object-concat.1.ddl.sqlpp
index c7f127b3e6..6f64c800f6 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/object-concat/object-concat.1.ddl.sqlpp
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/object-concat/object-concat.1.ddl.sqlpp
@@ -34,8 +34,8 @@ CREATE TYPE AvroType as {
 CREATE EXTERNAL DATASET AvroDataset(AvroType) USING %adapter%
 (
   %template%,
-  ("container"="playground"),
-  ("definition"="avro-data/reviews"),
+  %additional_Properties%,
+  ("definition"="%path_prefix%avro-data/reviews"),
   ("include"="*dummy_tweet.avro"),
   ("format" = "avro")
 );
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/select-all-fields/select-all-fields.1.ddl.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/select-all-fields/select-all-fields.1.ddl.sqlpp
index 6d77dab57e..fb2d3db534 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/select-all-fields/select-all-fields.1.ddl.sqlpp
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/select-all-fields/select-all-fields.1.ddl.sqlpp
@@ -34,8 +34,8 @@ CREATE TYPE AvroType as {
 CREATE EXTERNAL DATASET AvroDataset(AvroType) USING %adapter%
 (
 %template%,
-("container"="playground"),
-("definition"="avro-data/reviews"),
+%additional_Properties%,
+("definition"="%path_prefix%avro-data/reviews"),
 ("include"="*dummy_tweet.avro"),
 ("format" = "avro")
 );
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/select-count-one-field/select-count-one-field.1.ddl.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/select-count-one-field/select-count-one-field.1.ddl.sqlpp
index 052b1272ad..285d19c998 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/select-count-one-field/select-count-one-field.1.ddl.sqlpp
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/select-count-one-field/select-count-one-field.1.ddl.sqlpp
@@ -34,8 +34,8 @@ CREATE TYPE AvroType as {
 CREATE EXTERNAL DATASET AvroDataset(AvroType) USING %adapter%
 (
   %template%,
-  ("container"="playground"),
-  ("definition"="avro-data/reviews"),
+  %additional_Properties%,
+  ("definition"="%path_prefix%avro-data/reviews"),
   ("include"="*dummy_tweet.avro"),
   ("format" = "avro")
 );
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/string-standard-utf8/string-standard-utf8.1.ddl.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/string-standard-utf8/string-standard-utf8.1.ddl.sqlpp
index 17433a5a8e..d36da08ae8 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/string-standard-utf8/string-standard-utf8.1.ddl.sqlpp
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/string-standard-utf8/string-standard-utf8.1.ddl.sqlpp
@@ -34,8 +34,8 @@ CREATE TYPE AvroType as {
 CREATE EXTERNAL DATASET AvroDataset(AvroType) USING %adapter%
 (
   %template%,
-  ("container"="playground"),
-  ("definition"="avro-data/reviews"),
+  %additional_Properties%,
+  ("definition"="%path_prefix%avro-data/reviews"),
   ("include"="*id_name_comment.avro"),
   ("format" = "avro")
 );
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/type-mismatch/type-mismatch.1.ddl.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/type-mismatch/type-mismatch.1.ddl.sqlpp
index c8d7d212fa..d6d9dec57d 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/type-mismatch/type-mismatch.1.ddl.sqlpp
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/type-mismatch/type-mismatch.1.ddl.sqlpp
@@ -34,8 +34,8 @@ CREATE TYPE AvroType as {
 CREATE EXTERNAL DATASET AvroDataset(AvroType) USING %adapter%
 (
   %template%,
-  ("container"="playground"),
-  ("definition"="avro-data/reviews"),
+  %additional_Properties%,
+  ("definition"="%path_prefix%avro-data/reviews"),
   ("include"="*dummy_tweet.avro"),
   ("format" = "avro")
 );
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/dynamic-prefixes/avro/embed-flat/embed-flat.000.ddl.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/dynamic-prefixes/avro/embed-flat/embed-flat.000.ddl.sqlpp
index 654a84aa1f..dd5c56d1c2 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/dynamic-prefixes/avro/embed-flat/embed-flat.000.ddl.sqlpp
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/dynamic-prefixes/avro/embed-flat/embed-flat.000.ddl.sqlpp
@@ -30,8 +30,8 @@ CREATE TYPE TestType AS {
 
 CREATE EXTERNAL DATASET Name(TestType) USING %adapter% (
     %template%,
-    ("container"="playground"),
-    ("definition"="avro-data/external-filter/embed/name/{name:string}"),
+    %additional_Properties%,
+    
("definition"="%path_prefix%avro-data/external-filter/embed/name/{name:string}"),
     ("embed-filter-values" = "true"),
     ("format"="avro")
 );
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/dynamic-prefixes/avro/embed-multiple-values/embed-multiple-values.000.ddl.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/dynamic-prefixes/avro/embed-multiple-values/embed-multiple-values.000.ddl.sqlpp
index be3d97fe74..b835823ce4 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/dynamic-prefixes/avro/embed-multiple-values/embed-multiple-values.000.ddl.sqlpp
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/dynamic-prefixes/avro/embed-multiple-values/embed-multiple-values.000.ddl.sqlpp
@@ -26,8 +26,8 @@ CREATE TYPE test AS {
 
 CREATE EXTERNAL DATASET maintenance(test) USING %adapter% (
     %template%,
-    ("container"="playground"),
-    
("definition"="avro-data/external-filter/embed/car/{company:string}/customer/{customer_id:int}/maintenance-report/year-{year:int}-month-{month:int}-day-{day:int}-date"),
+    %additional_Properties%,
+    
("definition"="%path_prefix%avro-data/external-filter/embed/car/{company:string}/customer/{customer_id:int}/maintenance-report/year-{year:int}-month-{month:int}-day-{day:int}-date"),
     ("embed-filter-values" = "true"),
     ("format"="avro")
 );
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/dynamic-prefixes/avro/embed-one-value/embed-one-value.000.ddl.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/dynamic-prefixes/avro/embed-one-value/embed-one-value.000.ddl.sqlpp
index b1b1b76045..c947f707ce 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/dynamic-prefixes/avro/embed-one-value/embed-one-value.000.ddl.sqlpp
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/dynamic-prefixes/avro/embed-one-value/embed-one-value.000.ddl.sqlpp
@@ -27,16 +27,16 @@ CREATE TYPE TestType AS {
 
 CREATE EXTERNAL DATASET Department(TestType) USING %adapter% (
     %template%,
-    ("container"="playground"),
-    
("definition"="avro-data/external-filter/embed/department/{department:string}"),
+    %additional_Properties%,
+    
("definition"="%path_prefix%avro-data/external-filter/embed/department/{department:string}"),
     ("embed-filter-values" = "true"),
     ("format"="avro")
 );
 
 CREATE EXTERNAL DATASET LastName(TestType) USING %adapter% (
     %template%,
-    ("container"="playground"),
-    
("definition"="avro-data/external-filter/embed/last-name/{name.last:string}"),
+    %additional_Properties%,
+    
("definition"="%path_prefix%avro-data/external-filter/embed/last-name/{name.last:string}"),
     ("embed-filter-values" = "true"),
     ("format"="avro")
 );
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/dynamic-prefixes/avro/embed-with-nested-values/embed-flat.000.ddl.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/dynamic-prefixes/avro/embed-with-nested-values/embed-flat.000.ddl.sqlpp
index 2c5b352c8b..531e3981a3 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/dynamic-prefixes/avro/embed-with-nested-values/embed-flat.000.ddl.sqlpp
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/dynamic-prefixes/avro/embed-with-nested-values/embed-flat.000.ddl.sqlpp
@@ -30,8 +30,8 @@ CREATE TYPE TestType AS {
 
 CREATE EXTERNAL DATASET Name(TestType) USING %adapter% (
     %template%,
-    ("container"="playground"),
-    
("definition"="avro-data/external-filter/embed/nested-value/{name:string}"),
+    %additional_Properties%,
+    
("definition"="%path_prefix%avro-data/external-filter/embed/nested-value/{name:string}"),
     ("embed-filter-values" = "true"),
     ("format"="avro")
 );
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/dynamic-prefixes/avro/one-field/one-field.000.ddl.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/dynamic-prefixes/avro/one-field/one-field.000.ddl.sqlpp
index 5fcfff5f30..173d1d0978 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/dynamic-prefixes/avro/one-field/one-field.000.ddl.sqlpp
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/dynamic-prefixes/avro/one-field/one-field.000.ddl.sqlpp
@@ -27,16 +27,16 @@ CREATE TYPE TestType AS {
 
 CREATE EXTERNAL DATASET Department(TestType) USING %adapter% (
     %template%,
-    ("container"="playground"),
-    ("definition"="avro-data/external-filter/department/{department:string}"),
+    %additional_Properties%,
+    
("definition"="%path_prefix%avro-data/external-filter/department/{department:string}"),
     ("embed-filter-values" = "false"),
     ("format"="avro")
 );
 
 CREATE EXTERNAL DATASET LastName(TestType) USING %adapter% (
     %template%,
-    ("container"="playground"),
-    ("definition"="avro-data/external-filter/last-name/{name.last:string}"),
+    %additional_Properties%,
+    
("definition"="%path_prefix%avro-data/external-filter/last-name/{name.last:string}"),
     ("embed-filter-values" = "false"),
     ("format"="avro")
 );
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/dynamic-prefixes/avro/query/test.000.ddl.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/dynamic-prefixes/avro/query/test.000.ddl.sqlpp
index 4c418b480e..49cc3e6de8 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/dynamic-prefixes/avro/query/test.000.ddl.sqlpp
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/dynamic-prefixes/avro/query/test.000.ddl.sqlpp
@@ -27,24 +27,24 @@ CREATE TYPE test AS {
 
 CREATE EXTERNAL DATASET company(test) USING %adapter% (
     %template%,
-    ("container"="playground"),
-    ("definition"="avro-data/external-filter/car/{company:string}"),
+    %additional_Properties%,
+    
("definition"="%path_prefix%avro-data/external-filter/car/{company:string}"),
     ("embed-filter-values" = "false"),
     ("format"="avro")
 );
 
 CREATE EXTERNAL DATASET customer(test) USING %adapter% (
     %template%,
-    ("container"="playground"),
-    
("definition"="avro-data/external-filter/car/{company:string}/customer/{customer_id:int}"),
+    %additional_Properties%,
+    
("definition"="%path_prefix%avro-data/external-filter/car/{company:string}/customer/{customer_id:int}"),
     ("embed-filter-values" = "false"),
     ("format"="avro")
 );
 
 CREATE EXTERNAL DATASET maintenance(test) USING %adapter% (
     %template%,
-    ("container"="playground"),
-    
("definition"="avro-data/external-filter/car/{company:string}/customer/{customer_id:int}/maintenance-report/year-{year:int}-month-{month:int}-day-{day:int}-date"),
+    %additional_Properties%,
+    
("definition"="%path_prefix%avro-data/external-filter/car/{company:string}/customer/{customer_id:int}/maintenance-report/year-{year:int}-month-{month:int}-day-{day:int}-date"),
     ("embed-filter-values" = "false"),
     ("format"="avro")
 );
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/dynamic-prefixes/avro/using-limit/test.000.ddl.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/dynamic-prefixes/avro/using-limit/test.000.ddl.sqlpp
index 13316e854e..073965b66c 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/dynamic-prefixes/avro/using-limit/test.000.ddl.sqlpp
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/dynamic-prefixes/avro/using-limit/test.000.ddl.sqlpp
@@ -26,8 +26,8 @@ CREATE TYPE test AS {
 
 CREATE EXTERNAL DATASET maintenance(test) USING %adapter% (
     %template%,
-    ("container"="playground"),
-    
("definition"="avro-data/external-filter/car/{company:string}/customer/{customer_id:int}/maintenance-report/year-{year:int}-month-{month:int}-day-{day:int}-date"),
+    %additional_Properties%,
+    
("definition"="%path_prefix%avro-data/external-filter/car/{company:string}/customer/{customer_id:int}/maintenance-report/year-{year:int}-month-{month:int}-day-{day:int}-date"),
     ("embed-filter-values" = "false"),
     ("format"="avro")
 );
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/dynamic-prefixes/avro/views/test.000.ddl.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/dynamic-prefixes/avro/views/test.000.ddl.sqlpp
index 3c2b513116..495acc88ff 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/dynamic-prefixes/avro/views/test.000.ddl.sqlpp
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/dynamic-prefixes/avro/views/test.000.ddl.sqlpp
@@ -26,8 +26,8 @@ CREATE TYPE test AS {
 
 CREATE EXTERNAL DATASET maintenance(test) USING %adapter% (
     %template%,
-    ("container"="playground"),
-    
("definition"="avro-data/external-filter/embed/car/{company:string}/customer/{customer_id:int}/maintenance-report/year-{year:int}-month-{month:int}-day-{day:int}-date"),
+    %additional_Properties%,
+    
("definition"="%path_prefix%avro-data/external-filter/embed/car/{company:string}/customer/{customer_id:int}/maintenance-report/year-{year:int}-month-{month:int}-day-{day:int}-date"),
     ("embed-filter-values" = "true"),
     ("format"="avro")
 );
@@ -39,8 +39,8 @@ SELECT customer_id, year, month, day, company
 
 CREATE EXTERNAL DATASET maintenance2(test) USING %adapter% (
     %template%,
-    ("container"="playground"),
-    
("definition"="avro-data/external-filter/embed/car/{company:string}/customer/{customerId:int}/maintenance-report/year-{year:int}-month-{month:int}-day-{day:int}-date"),
+    %additional_Properties%,
+    
("definition"="%path_prefix%avro-data/external-filter/embed/car/{company:string}/customer/{customerId:int}/maintenance-report/year-{year:int}-month-{month:int}-day-{day:int}-date"),
     ("embed-filter-values" = "true"),
     ("format"="avro")
 );
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
 
b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
index c8a6785a20..ff1b325c9f 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
@@ -558,72 +558,96 @@
     <test-case FilePath="external-dataset">
       <compilation-unit name="common/avro/avro-types/avro-map">
         <placeholder name="adapter" value="S3" />
+        <placeholder name="path_prefix" value="" />
+        <placeholder name="additional_Properties" value='("container" = 
"playground")' />
         <output-dir compare="Text">common/avro/avro-types/avro-map</output-dir>
       </compilation-unit>
     </test-case>
     <test-case FilePath="external-dataset">
       <compilation-unit name="common/avro/avro-types/avro-nested-records">
         <placeholder name="adapter" value="S3" />
+        <placeholder name="path_prefix" value="" />
+        <placeholder name="additional_Properties" value='("container" = 
"playground")' />
         <output-dir 
compare="Text">common/avro/avro-types/avro-nested-records</output-dir>
       </compilation-unit>
     </test-case>
     <test-case FilePath="external-dataset">
       <compilation-unit name="common/avro/avro-types/avro-primitives">
         <placeholder name="adapter" value="S3" />
+        <placeholder name="path_prefix" value="" />
+        <placeholder name="additional_Properties" value='("container" = 
"playground")' />
         <output-dir 
compare="Text">common/avro/avro-types/avro-primitives</output-dir>
       </compilation-unit>
     </test-case>
     <test-case FilePath="external-dataset">
       <compilation-unit name="common/avro/avro-types/avro-union">
         <placeholder name="adapter" value="S3" />
+        <placeholder name="path_prefix" value="" />
+        <placeholder name="additional_Properties" value='("container" = 
"playground")' />
         <output-dir 
compare="Text">common/avro/avro-types/avro-union</output-dir>
       </compilation-unit>
     </test-case>
     <test-case FilePath="external-dataset">
       <compilation-unit name="common/avro/type-mismatch">
         <placeholder name="adapter" value="S3" />
+        <placeholder name="path_prefix" value="" />
+        <placeholder name="additional_Properties" value='("container" = 
"playground")' />
         <output-dir compare="Text">common/avro/type-mismatch</output-dir>
       </compilation-unit>
     </test-case>
     <test-case FilePath="external-dataset">
       <compilation-unit name="common/avro/string-standard-utf8">
         <placeholder name="adapter" value="S3" />
+        <placeholder name="path_prefix" value="" />
+        <placeholder name="additional_Properties" value='("container" = 
"playground")' />
         <output-dir 
compare="Text">common/avro/string-standard-utf8</output-dir>
       </compilation-unit>
     </test-case>
     <test-case FilePath="external-dataset">
       <compilation-unit name="common/avro/select-all-fields">
         <placeholder name="adapter" value="S3" />
+        <placeholder name="path_prefix" value="" />
+        <placeholder name="additional_Properties" value='("container" = 
"playground")' />
         <output-dir compare="Text">common/avro/select-all-fields</output-dir>
       </compilation-unit>
     </test-case>
     <test-case FilePath="external-dataset">
       <compilation-unit name="common/avro/select-count-one-field">
         <placeholder name="adapter" value="S3" />
+        <placeholder name="path_prefix" value="" />
+        <placeholder name="additional_Properties" value='("container" = 
"playground")' />
         <output-dir 
compare="Text">common/avro/select-count-one-field</output-dir>
       </compilation-unit>
     </test-case>
     <test-case FilePath="external-dataset">
       <compilation-unit name="common/avro/array-access">
         <placeholder name="adapter" value="S3" />
+        <placeholder name="path_prefix" value="" />
+        <placeholder name="additional_Properties" value='("container" = 
"playground")' />
         <output-dir compare="Text">common/avro/array-access</output-dir>
       </compilation-unit>
     </test-case>
     <test-case FilePath="external-dataset">
       <compilation-unit name="common/avro/field-access">
         <placeholder name="adapter" value="S3" />
+        <placeholder name="path_prefix" value="" />
+        <placeholder name="additional_Properties" value='("container" = 
"playground")' />
         <output-dir compare="Text">common/avro/field-access</output-dir>
       </compilation-unit>
     </test-case>
     <test-case FilePath="external-dataset">
       <compilation-unit name="common/avro/heterogeneous-access">
         <placeholder name="adapter" value="S3" />
+        <placeholder name="path_prefix" value="" />
+        <placeholder name="additional_Properties" value='("container" = 
"playground")' />
         <output-dir 
compare="Text">common/avro/heterogeneous-access</output-dir>
       </compilation-unit>
     </test-case>
     <test-case FilePath="external-dataset">
       <compilation-unit name="common/avro/invalid-avro-files">
         <placeholder name="adapter" value="S3" />
+        <placeholder name="path_prefix" value="" />
+        <placeholder name="additional_Properties" value='("container" = 
"playground")' />
         <output-dir compare="Text">none</output-dir>
         <expected-error>External source error. 
org.apache.avro.InvalidAvroMagicException: Not an Avro data 
file.</expected-error>
         <source-location>false</source-location>
@@ -632,6 +656,8 @@
     <test-case FilePath="external-dataset">
       <compilation-unit name="common/avro/invalid-type">
         <placeholder name="adapter" value="S3" />
+        <placeholder name="path_prefix" value="" />
+        <placeholder name="additional_Properties" value='("container" = 
"playground")' />
         <output-dir compare="Text">none</output-dir>
         <expected-error>ASX3123: Type 'AvroType' contains declared fields, 
which is not supported for 'avro' format</expected-error>
       </compilation-unit>
@@ -639,24 +665,32 @@
     <test-case FilePath="external-dataset">
       <compilation-unit name="common/avro/missing-fields">
         <placeholder name="adapter" value="S3" />
+        <placeholder name="path_prefix" value="" />
+        <placeholder name="additional_Properties" value='("container" = 
"playground")' />
         <output-dir compare="Text">common/avro/missing-fields</output-dir>
       </compilation-unit>
     </test-case>
     <test-case FilePath="external-dataset">
       <compilation-unit name="common/avro/multi-file-multi-schema">
         <placeholder name="adapter" value="S3" />
+        <placeholder name="path_prefix" value="" />
+        <placeholder name="additional_Properties" value='("container" = 
"playground")' />
         <output-dir 
compare="Text">common/avro/multi-file-multi-schema</output-dir>
       </compilation-unit>
     </test-case>
     <test-case FilePath="external-dataset">
       <compilation-unit name="common/avro/no-files">
         <placeholder name="adapter" value="S3" />
+        <placeholder name="path_prefix" value="" />
+        <placeholder name="additional_Properties" value='("container" = 
"playground")' />
         <output-dir compare="Text">common/avro/no-files</output-dir>
       </compilation-unit>
     </test-case>
     <test-case FilePath="external-dataset">
       <compilation-unit name="common/avro/object-concat">
         <placeholder name="adapter" value="S3" />
+        <placeholder name="path_prefix" value="" />
+        <placeholder name="additional_Properties" value='("container" = 
"playground")' />
         <output-dir compare="Text">common/avro/object-concat</output-dir>
       </compilation-unit>
     </test-case>
@@ -891,48 +925,64 @@
     <test-case FilePath="external-dataset/common/dynamic-prefixes/avro">
       <compilation-unit name="one-field">
         <placeholder name="adapter" value="S3" />
+        <placeholder name="path_prefix" value="" />
+        <placeholder name="additional_Properties" value='("container" = 
"playground")' />
         <output-dir compare="Text">one-field</output-dir>
       </compilation-unit>
     </test-case>
     <test-case FilePath="external-dataset/common/dynamic-prefixes/avro">
       <compilation-unit name="query">
         <placeholder name="adapter" value="S3" />
+        <placeholder name="path_prefix" value="" />
+        <placeholder name="additional_Properties" value='("container" = 
"playground")' />
         <output-dir compare="Text">query</output-dir>
       </compilation-unit>
     </test-case>
     <test-case FilePath="external-dataset/common/dynamic-prefixes/avro">
       <compilation-unit name="embed-one-value">
         <placeholder name="adapter" value="S3" />
+        <placeholder name="path_prefix" value="" />
+        <placeholder name="additional_Properties" value='("container" = 
"playground")' />
         <output-dir compare="Text">embed-one-value</output-dir>
       </compilation-unit>
     </test-case>
     <test-case FilePath="external-dataset/common/dynamic-prefixes/avro">
       <compilation-unit name="embed-multiple-values">
         <placeholder name="adapter" value="S3" />
+        <placeholder name="path_prefix" value="" />
+        <placeholder name="additional_Properties" value='("container" = 
"playground")' />
         <output-dir compare="Text">embed-multiple-values</output-dir>
       </compilation-unit>
     </test-case>
     <test-case FilePath="external-dataset/common/dynamic-prefixes/avro">
       <compilation-unit name="embed-flat">
         <placeholder name="adapter" value="S3" />
+        <placeholder name="path_prefix" value="" />
+        <placeholder name="additional_Properties" value='("container" = 
"playground")' />
         <output-dir compare="Text">embed-flat</output-dir>
       </compilation-unit>
     </test-case>
     <test-case FilePath="external-dataset/common/dynamic-prefixes/avro">
       <compilation-unit name="embed-with-nested-values">
         <placeholder name="adapter" value="S3" />
+        <placeholder name="path_prefix" value="" />
+        <placeholder name="additional_Properties" value='("container" = 
"playground")' />
         <output-dir compare="Text">embed-with-nested-values</output-dir>
       </compilation-unit>
     </test-case>
     <test-case FilePath="external-dataset/common/dynamic-prefixes/avro">
       <compilation-unit name="using-limit">
         <placeholder name="adapter" value="S3" />
+        <placeholder name="path_prefix" value="" />
+        <placeholder name="additional_Properties" value='("container" = 
"playground")' />
         <output-dir compare="Text">using-limit</output-dir>
       </compilation-unit>
     </test-case>
     <test-case FilePath="external-dataset/common/dynamic-prefixes/avro">
       <compilation-unit name="views">
         <placeholder name="adapter" value="S3" />
+        <placeholder name="path_prefix" value="" />
+        <placeholder name="additional_Properties" value='("container" = 
"playground")' />
         <output-dir compare="Text">views</output-dir>
       </compilation-unit>
     </test-case>
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp_hdfs.xml 
b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp_hdfs.xml
index 6851433fca..b178efd99f 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp_hdfs.xml
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp_hdfs.xml
@@ -359,4 +359,212 @@
     <!--      </compilation-unit>-->
     <!--    </test-case>-->
   </test-group>
+  <test-group name="avro">
+    <test-case FilePath="external-dataset">
+      <compilation-unit name="common/avro/avro-types/avro-map">
+        <placeholder name="adapter" value="hdfs" />
+        <placeholder name="path_prefix" value="/playground/" />
+        <placeholder name="additional_Properties" value='("input-format" = 
"avro-input-format")' />
+        <output-dir compare="Text">common/avro/avro-types/avro-map</output-dir>
+      </compilation-unit>
+    </test-case>
+    <test-case FilePath="external-dataset">
+      <compilation-unit name="common/avro/avro-types/avro-nested-records">
+        <placeholder name="adapter" value="hdfs" />
+        <placeholder name="path_prefix" value="/playground/" />
+        <placeholder name="additional_Properties" value='("input-format" = 
"avro-input-format")' />
+        <output-dir 
compare="Text">common/avro/avro-types/avro-nested-records</output-dir>
+      </compilation-unit>
+    </test-case>
+    <test-case FilePath="external-dataset">
+      <compilation-unit name="common/avro/avro-types/avro-primitives">
+        <placeholder name="adapter" value="hdfs" />
+        <placeholder name="path_prefix" value="/playground/" />
+        <placeholder name="additional_Properties" value='("input-format" = 
"avro-input-format")' />
+        <output-dir 
compare="Text">common/avro/avro-types/avro-primitives</output-dir>
+      </compilation-unit>
+    </test-case>
+    <test-case FilePath="external-dataset">
+      <compilation-unit name="common/avro/avro-types/avro-union">
+        <placeholder name="adapter" value="hdfs" />
+        <placeholder name="path_prefix" value="/playground/" />
+        <placeholder name="additional_Properties" value='("input-format" = 
"avro-input-format")' />
+        <output-dir 
compare="Text">common/avro/avro-types/avro-union</output-dir>
+      </compilation-unit>
+    </test-case>
+    <test-case FilePath="external-dataset">
+      <compilation-unit name="common/avro/type-mismatch">
+        <placeholder name="adapter" value="hdfs" />
+        <placeholder name="path_prefix" value="/playground/" />
+        <placeholder name="additional_Properties" value='("input-format" = 
"avro-input-format")' />
+        <output-dir compare="Text">common/avro/type-mismatch</output-dir>
+      </compilation-unit>
+    </test-case>
+    <test-case FilePath="external-dataset">
+      <compilation-unit name="common/avro/string-standard-utf8">
+        <placeholder name="adapter" value="hdfs" />
+        <placeholder name="path_prefix" value="/playground/" />
+        <placeholder name="additional_Properties" value='("input-format" = 
"avro-input-format")' />
+        <output-dir 
compare="Text">common/avro/string-standard-utf8</output-dir>
+      </compilation-unit>
+    </test-case>
+    <test-case FilePath="external-dataset">
+      <compilation-unit name="common/avro/select-all-fields">
+        <placeholder name="adapter" value="hdfs" />
+        <placeholder name="path_prefix" value="/playground/" />
+        <placeholder name="additional_Properties" value='("input-format" = 
"avro-input-format")' />
+        <output-dir compare="Text">common/avro/select-all-fields</output-dir>
+      </compilation-unit>
+    </test-case>
+    <test-case FilePath="external-dataset">
+      <compilation-unit name="common/avro/select-count-one-field">
+        <placeholder name="adapter" value="hdfs" />
+        <placeholder name="path_prefix" value="/playground/" />
+        <placeholder name="additional_Properties" value='("input-format" = 
"avro-input-format")' />
+        <output-dir 
compare="Text">common/avro/select-count-one-field</output-dir>
+      </compilation-unit>
+    </test-case>
+    <test-case FilePath="external-dataset">
+      <compilation-unit name="common/avro/array-access">
+        <placeholder name="adapter" value="hdfs" />
+        <placeholder name="path_prefix" value="/playground/" />
+        <placeholder name="additional_Properties" value='("input-format" = 
"avro-input-format")' />
+        <output-dir compare="Text">common/avro/array-access</output-dir>
+      </compilation-unit>
+    </test-case>
+    <test-case FilePath="external-dataset">
+      <compilation-unit name="common/avro/field-access">
+        <placeholder name="adapter" value="hdfs" />
+        <placeholder name="path_prefix" value="/playground/" />
+        <placeholder name="additional_Properties" value='("input-format" = 
"avro-input-format")' />
+        <output-dir compare="Text">common/avro/field-access</output-dir>
+      </compilation-unit>
+    </test-case>
+    <test-case FilePath="external-dataset">
+      <compilation-unit name="common/avro/heterogeneous-access">
+        <placeholder name="adapter" value="hdfs" />
+        <placeholder name="path_prefix" value="/playground/" />
+        <placeholder name="additional_Properties" value='("input-format" = 
"avro-input-format")' />
+        <output-dir 
compare="Text">common/avro/heterogeneous-access</output-dir>
+      </compilation-unit>
+    </test-case>
+    <test-case FilePath="external-dataset">
+      <compilation-unit name="common/avro/invalid-avro-files">
+        <placeholder name="adapter" value="hdfs" />
+        <placeholder name="path_prefix" value="/playground/" />
+        <placeholder name="additional_Properties" value='("input-format" = 
"avro-input-format")' />
+        <output-dir compare="Text">none</output-dir>
+        <expected-error>External source error. 
org.apache.avro.InvalidAvroMagicException: Not an Avro data 
file</expected-error>
+        <source-location>false</source-location>
+      </compilation-unit>
+    </test-case>
+    <test-case FilePath="external-dataset">
+      <compilation-unit name="common/avro/invalid-type">
+        <placeholder name="adapter" value="hdfs" />
+        <placeholder name="path_prefix" value="/playground/" />
+        <placeholder name="additional_Properties" value='("input-format" = 
"avro-input-format")' />
+        <output-dir compare="Text">none</output-dir>
+        <expected-error>ASX3123: Type 'AvroType' contains declared fields, 
which is not supported for 'avro' format</expected-error>
+        <source-location>false</source-location>
+      </compilation-unit>
+    </test-case>
+    <test-case FilePath="external-dataset">
+      <compilation-unit name="common/avro/missing-fields">
+        <placeholder name="adapter" value="hdfs" />
+        <placeholder name="path_prefix" value="/playground/" />
+        <placeholder name="additional_Properties" value='("input-format" = 
"avro-input-format")' />
+        <output-dir compare="Text">common/avro/missing-fields</output-dir>
+      </compilation-unit>
+    </test-case>
+    <test-case FilePath="external-dataset">
+      <compilation-unit name="common/avro/multi-file-multi-schema">
+        <placeholder name="adapter" value="hdfs" />
+        <placeholder name="path_prefix" value="/playground/" />
+        <placeholder name="additional_Properties" value='("input-format" = 
"avro-input-format")' />
+        <output-dir 
compare="Text">common/avro/multi-file-multi-schema</output-dir>
+      </compilation-unit>
+    </test-case>
+    <test-case FilePath="external-dataset">
+      <compilation-unit name="common/avro/no-files">
+        <placeholder name="adapter" value="hdfs" />
+        <placeholder name="path_prefix" value="/playground/" />
+        <placeholder name="additional_Properties" value='("input-format" = 
"avro-input-format")' />
+        <output-dir compare="Text">common/avro/no-files</output-dir>
+        <expected-error>ASX1114: The provided external dataset configuration 
returned no files from the external source</expected-error>
+        <source-location>false</source-location>
+      </compilation-unit>
+    </test-case>
+    <test-case FilePath="external-dataset">
+      <compilation-unit name="common/avro/object-concat">
+        <placeholder name="adapter" value="hdfs" />
+        <placeholder name="path_prefix" value="/playground/" />
+        <placeholder name="additional_Properties" value='("input-format" = 
"avro-input-format")' />
+        <output-dir compare="Text">common/avro/object-concat</output-dir>
+      </compilation-unit>
+    </test-case>
+    <test-case FilePath="external-dataset/common/dynamic-prefixes/avro">
+      <compilation-unit name="one-field">
+        <placeholder name="adapter" value="hdfs" />
+        <placeholder name="path_prefix" value="/playground/" />
+        <placeholder name="additional_Properties" value='("input-format" = 
"avro-input-format")' />
+        <output-dir compare="Text">one-field</output-dir>
+      </compilation-unit>
+    </test-case>
+    <test-case FilePath="external-dataset/common/dynamic-prefixes/avro">
+      <compilation-unit name="query">
+        <placeholder name="adapter" value="hdfs" />
+        <placeholder name="path_prefix" value="/playground/" />
+        <placeholder name="additional_Properties" value='("input-format" = 
"avro-input-format")' />
+        <output-dir compare="Text">query</output-dir>
+      </compilation-unit>
+    </test-case>
+    <test-case FilePath="external-dataset/common/dynamic-prefixes/avro">
+      <compilation-unit name="embed-one-value">
+        <placeholder name="adapter" value="hdfs" />
+        <placeholder name="path_prefix" value="/playground/" />
+        <placeholder name="additional_Properties" value='("input-format" = 
"avro-input-format")' />
+        <output-dir compare="Text">embed-one-value</output-dir>
+      </compilation-unit>
+    </test-case>
+    <test-case FilePath="external-dataset/common/dynamic-prefixes/avro">
+      <compilation-unit name="embed-multiple-values">
+        <placeholder name="adapter" value="hdfs" />
+        <placeholder name="path_prefix" value="/playground/" />
+        <placeholder name="additional_Properties" value='("input-format" = 
"avro-input-format")' />
+        <output-dir compare="Text">embed-multiple-values</output-dir>
+      </compilation-unit>
+    </test-case>
+    <test-case FilePath="external-dataset/common/dynamic-prefixes/avro">
+      <compilation-unit name="embed-flat">
+        <placeholder name="adapter" value="hdfs" />
+        <placeholder name="path_prefix" value="/playground/" />
+        <placeholder name="additional_Properties" value='("input-format" = 
"avro-input-format")' />
+        <output-dir compare="Text">embed-flat</output-dir>
+      </compilation-unit>
+    </test-case>
+    <test-case FilePath="external-dataset/common/dynamic-prefixes/avro">
+      <compilation-unit name="embed-with-nested-values">
+        <placeholder name="adapter" value="hdfs" />
+        <placeholder name="path_prefix" value="/playground/" />
+        <placeholder name="additional_Properties" value='("input-format" = 
"avro-input-format")' />
+        <output-dir compare="Text">embed-with-nested-values</output-dir>
+      </compilation-unit>
+    </test-case>
+    <test-case FilePath="external-dataset/common/dynamic-prefixes/avro">
+      <compilation-unit name="using-limit">
+        <placeholder name="adapter" value="hdfs" />
+        <placeholder name="path_prefix" value="/playground/" />
+        <placeholder name="additional_Properties" value='("input-format" = 
"avro-input-format")' />
+        <output-dir compare="Text">using-limit</output-dir>
+      </compilation-unit>
+    </test-case>
+    <test-case FilePath="external-dataset/common/dynamic-prefixes/avro">
+      <compilation-unit name="views">
+        <placeholder name="adapter" value="hdfs" />
+        <placeholder name="path_prefix" value="/playground/" />
+        <placeholder name="additional_Properties" value='("input-format" = 
"avro-input-format")' />
+        <output-dir compare="Text">views</output-dir>
+      </compilation-unit>
+    </test-case>
+  </test-group>
 </test-suite>
diff --git a/asterixdb/asterix-external-data/pom.xml 
b/asterixdb/asterix-external-data/pom.xml
index 27c3ac1c38..e1cdd3a880 100644
--- a/asterixdb/asterix-external-data/pom.xml
+++ b/asterixdb/asterix-external-data/pom.xml
@@ -598,6 +598,11 @@
       <artifactId>delta-standalone_2.12</artifactId>
       <version>3.0.0</version>
     </dependency>
+    <dependency>
+      <groupId>org.apache.avro</groupId>
+      <artifactId>avro-mapred</artifactId>
+      <version>1.12.0</version>
+    </dependency>
   </dependencies>
   <!-- apply patch for HADOOP-17225 to workaround CVE-2019-10172 -->
   <repositories>
diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/HDFSDataSourceFactory.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/HDFSDataSourceFactory.java
index 934ba1dfab..8af73428bb 100644
--- 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/HDFSDataSourceFactory.java
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/HDFSDataSourceFactory.java
@@ -45,6 +45,7 @@ import org.apache.asterix.external.api.IRecordReaderFactory;
 import 
org.apache.asterix.external.input.filter.embedder.IExternalFilterValueEmbedder;
 import 
org.apache.asterix.external.input.record.reader.abstracts.AbstractExternalInputStreamFactory;
 import org.apache.asterix.external.input.record.reader.hdfs.HDFSRecordReader;
+import 
org.apache.asterix.external.input.record.reader.hdfs.avro.AvroFileRecordReader;
 import 
org.apache.asterix.external.input.record.reader.hdfs.parquet.ParquetFileRecordReader;
 import 
org.apache.asterix.external.input.record.reader.stream.StreamRecordReader;
 import org.apache.asterix.external.input.stream.HDFSInputStream;
@@ -54,6 +55,7 @@ import org.apache.asterix.external.util.ExternalDataConstants;
 import org.apache.asterix.external.util.ExternalDataPrefix;
 import org.apache.asterix.external.util.ExternalDataUtils;
 import org.apache.asterix.external.util.HDFSUtils;
+import org.apache.avro.generic.GenericRecord;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.LocatedFileStatus;
 import org.apache.hadoop.fs.Path;
@@ -197,6 +199,8 @@ public class HDFSDataSourceFactory implements 
IRecordReaderFactory<Object>, IExt
                 reader.close();
             } else if 
(formatString.equals(ExternalDataConstants.FORMAT_PARQUET)) {
                 recordClass = IValueReference.class;
+            } else if (formatString.equals(ExternalDataConstants.FORMAT_AVRO)) 
{
+                recordClass = GenericRecord.class;
             } else {
                 recordReaderClazz = 
StreamRecordReaderProvider.getRecordReaderClazz(configuration);
                 this.recordClass = char[].class;
@@ -356,6 +360,9 @@ public class HDFSDataSourceFactory implements 
IRecordReaderFactory<Object>, IExt
         if (configuration.get(ExternalDataConstants.KEY_INPUT_FORMAT).trim()
                 .equals(ExternalDataConstants.INPUT_FORMAT_PARQUET)) {
             return new ParquetFileRecordReader<>(read, inputSplits, 
readSchedule, nodeName, conf, context, ugi);
+        } else if 
(configuration.get(ExternalDataConstants.KEY_INPUT_FORMAT).trim()
+                .equals(ExternalDataConstants.INPUT_FORMAT_AVRO)) {
+            return new AvroFileRecordReader<>(read, inputSplits, readSchedule, 
nodeName, conf, context, ugi);
         } else {
             return new HDFSRecordReader<>(read, inputSplits, readSchedule, 
nodeName, conf, ugi);
         }
diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/hdfs/AbstractHDFSRecordReader.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/hdfs/AbstractHDFSRecordReader.java
index 04c80d1d72..352f118979 100644
--- 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/hdfs/AbstractHDFSRecordReader.java
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/hdfs/AbstractHDFSRecordReader.java
@@ -86,14 +86,11 @@ public abstract class AbstractHDFSRecordReader<K, V> 
implements IRecordReader<V>
             nextInputSplit();
         }
 
-        if (reader.next(key, value)) {
-            return true;
-        }
-        while (nextInputSplit()) {
-            if (reader.next(key, value)) {
+        do {
+            if (readerHasNext()) {
                 return true;
             }
-        }
+        } while (nextInputSplit());
         return false;
     }
 
@@ -103,6 +100,10 @@ public abstract class AbstractHDFSRecordReader<K, V> 
implements IRecordReader<V>
         return record;
     }
 
+    protected boolean readerHasNext() throws IOException {
+        return reader.next(key, value);
+    }
+
     private boolean nextInputSplit() throws IOException {
         for (; currentSplitIndex < inputSplits.length; currentSplitIndex++) {
             /**
@@ -128,14 +129,18 @@ public abstract class AbstractHDFSRecordReader<K, V> 
implements IRecordReader<V>
                     continue;
                 }
 
-                reader.close();
-                reader = getRecordReader(currentSplitIndex);
+                closeRecordReader();
+                setRecordReader(currentSplitIndex);
                 return true;
             }
         }
         return false;
     }
 
+    protected void closeRecordReader() throws IOException {
+        reader.close();
+    }
+
     /**
      * Returns true if need to go to next split without closing the current 
reader
      *
@@ -143,7 +148,7 @@ public abstract class AbstractHDFSRecordReader<K, V> 
implements IRecordReader<V>
      */
     protected abstract boolean onNextInputSplit() throws IOException;
 
-    protected abstract RecordReader<K, V> getRecordReader(int splitIndex) 
throws IOException;
+    protected abstract void setRecordReader(int splitIndex) throws IOException;
 
     @Override
     public boolean stop() {
diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/hdfs/HDFSRecordReader.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/hdfs/HDFSRecordReader.java
index 6f338c42d6..f143188fcb 100644
--- 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/hdfs/HDFSRecordReader.java
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/hdfs/HDFSRecordReader.java
@@ -37,7 +37,7 @@ public class HDFSRecordReader<K, V extends Writable> extends 
AbstractHDFSRecordR
     }
 
     @Override
-    protected RecordReader<K, V> getRecordReader(int splitIndex) throws 
IOException {
+    protected void setRecordReader(int splitIndex) throws IOException {
         try {
             reader = ugi == null ? getReader(splitIndex)
                     : ugi.doAs((PrivilegedExceptionAction<RecordReader<K, V>>) 
() -> getReader(splitIndex));
@@ -48,7 +48,6 @@ public class HDFSRecordReader<K, V extends Writable> extends 
AbstractHDFSRecordR
             key = reader.createKey();
             value = reader.createValue();
         }
-        return reader;
     }
 
     @SuppressWarnings("unchecked")
diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/hdfs/avro/AvroFileInputFormat.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/hdfs/avro/AvroFileInputFormat.java
new file mode 100644
index 0000000000..3c279a4ea1
--- /dev/null
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/hdfs/avro/AvroFileInputFormat.java
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.input.record.reader.hdfs.avro;
+
+import org.apache.avro.mapred.AvroInputFormat;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.compress.CompressionCodec;
+import org.apache.hadoop.io.compress.CompressionCodecFactory;
+import org.apache.hadoop.io.compress.SplittableCompressionCodec;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.JobConfigurable;
+
+public class AvroFileInputFormat<T> extends AvroInputFormat<T> implements 
JobConfigurable {
+    private CompressionCodecFactory compressionCodecs = null;
+
+    public void configure(JobConf conf) {
+        compressionCodecs = new CompressionCodecFactory(conf);
+    }
+
+    @Override
+    public boolean isSplitable(FileSystem fs, Path file) {
+        final CompressionCodec codec = compressionCodecs.getCodec(file);
+        if (null == codec) {
+            return true;
+        }
+        return codec instanceof SplittableCompressionCodec;
+    }
+}
diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/hdfs/avro/AvroFileRecordReader.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/hdfs/avro/AvroFileRecordReader.java
new file mode 100644
index 0000000000..e5056663bb
--- /dev/null
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/hdfs/avro/AvroFileRecordReader.java
@@ -0,0 +1,156 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.input.record.reader.hdfs.avro;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.security.PrivilegedExceptionAction;
+import java.util.zip.GZIPInputStream;
+
+import org.apache.asterix.common.exceptions.ErrorCode;
+import org.apache.asterix.common.exceptions.RuntimeDataException;
+import org.apache.asterix.external.api.IExternalDataRuntimeContext;
+import org.apache.asterix.external.api.IRawRecord;
+import 
org.apache.asterix.external.input.filter.embedder.IExternalFilterValueEmbedder;
+import org.apache.asterix.external.input.record.GenericRecord;
+import 
org.apache.asterix.external.input.record.reader.hdfs.AbstractHDFSRecordReader;
+import org.apache.asterix.external.input.record.reader.hdfs.EmptyRecordReader;
+import org.apache.avro.InvalidAvroMagicException;
+import org.apache.avro.file.DataFileStream;
+import org.apache.avro.generic.GenericDatumReader;
+import org.apache.avro.mapred.AvroWrapper;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapred.FileSplit;
+import org.apache.hadoop.mapred.InputSplit;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.RecordReader;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.api.util.ExceptionUtils;
+
+public class AvroFileRecordReader<T> extends AbstractHDFSRecordReader<Void, T> 
{
+
+    private AvroWrapper<T> key;
+    private NullWritable value;
+    private final IRawRecord<T> record;
+    private RecordReader<AvroWrapper<T>, NullWritable> reader;
+    private final IExternalFilterValueEmbedder valueEmbedder;
+    private boolean isCompressed = false;
+    private DataFileStream<T> dataFileStream;
+    private FileSystem fs;
+    private InputStream in;
+
+    public AvroFileRecordReader(boolean[] read, InputSplit[] inputSplits, 
String[] readSchedule, String nodeName,
+            JobConf conf, IExternalDataRuntimeContext context, 
UserGroupInformation ugi) {
+        super(read, inputSplits, readSchedule, nodeName, conf, ugi);
+        reader = new EmptyRecordReader<>();
+        record = new GenericRecord<>();
+        valueEmbedder = context.getValueEmbedder();
+    }
+
+    @Override
+    protected boolean onNextInputSplit() {
+        return false;
+    }
+
+    @Override
+    @SuppressWarnings("unchecked")
+    protected void setRecordReader(int splitIndex) throws IOException {
+        try {
+            String filePath = getPath(inputSplits[splitIndex]);
+            valueEmbedder.setPath(filePath);
+            if (StringUtils.endsWithIgnoreCase(filePath, ".gz") || 
StringUtils.endsWithIgnoreCase(filePath, ".gzip")) {
+                isCompressed = true;
+                fs = ugi == null ? FileSystem.get(conf)
+                        : ugi.doAs((PrivilegedExceptionAction<FileSystem>) () 
-> FileSystem.get(conf));
+                in = new GZIPInputStream(fs.open(new Path(filePath)));
+                GenericDatumReader<T> datumReader = new GenericDatumReader<>();
+                dataFileStream = new DataFileStream<>(in, datumReader);
+            } else {
+                isCompressed = false;
+                reader = (RecordReader<AvroWrapper<T>, NullWritable>) (ugi == 
null
+                        ? inputFormat.getRecordReader(inputSplits[splitIndex], 
conf, Reporter.NULL)
+                        : ugi.doAs((PrivilegedExceptionAction<?>) () -> 
inputFormat
+                                .getRecordReader(inputSplits[splitIndex], 
conf, Reporter.NULL)));
+                if (key == null) {
+                    key = reader.createKey();
+                    value = reader.createValue();
+                }
+            }
+        } catch (InterruptedException ex) {
+            throw HyracksDataException.create(ex);
+        } catch (InvalidAvroMagicException ex) {
+            throw RuntimeDataException.create(ErrorCode.EXTERNAL_SOURCE_ERROR, 
ex,
+                    ExceptionUtils.getMessageOrToString(ex));
+        }
+    }
+
+    @Override
+    protected void closeRecordReader() throws IOException {
+        if (isCompressed) {
+            dataFileStream.close();
+            in.close();
+            fs.close();
+        } else {
+            reader.close();
+        }
+    }
+
+    @Override
+    public void close() throws IOException {
+        if (isCompressed) {
+            dataFileStream.close();
+            in.close();
+            fs.close();
+        } else {
+            reader.close();
+        }
+    }
+
+    @Override
+    protected boolean readerHasNext() throws IOException {
+        if (isCompressed) {
+            return dataFileStream.hasNext();
+        } else {
+            return reader.next(key, value);
+        }
+    }
+
+    @Override
+    public IRawRecord<T> next() throws IOException {
+        if (isCompressed) {
+            record.set(dataFileStream.next());
+        } else {
+            record.set(key.datum());
+        }
+        return record;
+    }
+
+    private String getPath(InputSplit split) {
+        if (split instanceof FileSplit) {
+            return ((FileSplit) split).getPath().toString();
+        } else {
+            return split.toString();
+        }
+    }
+}
diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/hdfs/parquet/ParquetFileRecordReader.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/hdfs/parquet/ParquetFileRecordReader.java
index c11885a18d..1d123b61d1 100644
--- 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/hdfs/parquet/ParquetFileRecordReader.java
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/hdfs/parquet/ParquetFileRecordReader.java
@@ -62,7 +62,7 @@ public class ParquetFileRecordReader<V extends 
IValueReference> extends Abstract
 
     @SuppressWarnings("unchecked")
     @Override
-    protected RecordReader<Void, V> getRecordReader(int splitIndex) throws 
IOException {
+    protected void setRecordReader(int splitIndex) throws IOException {
         try {
             ParquetRecordReaderWrapper readerWrapper = ugi == null ? 
getReader(splitIndex)
                     : 
ugi.doAs((PrivilegedExceptionAction<ParquetRecordReaderWrapper>) () -> 
getReader(splitIndex));
@@ -75,7 +75,6 @@ public class ParquetFileRecordReader<V extends 
IValueReference> extends Abstract
         if (value == null) {
             value = reader.createValue();
         }
-        return reader;
     }
 
     private ParquetRecordReaderWrapper getReader(int splitIndex) throws 
IOException {
diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
index d487e6868f..46a1b5b7a4 100644
--- 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
@@ -24,6 +24,7 @@ import java.util.function.LongSupplier;
 import java.util.function.Supplier;
 import java.util.regex.Pattern;
 
+import 
org.apache.asterix.external.input.record.reader.hdfs.avro.AvroFileInputFormat;
 import org.apache.asterix.om.types.ATypeTag;
 import org.apache.hyracks.util.StorageUtil;
 
@@ -176,16 +177,21 @@ public class ExternalDataConstants {
     public static final String CLASS_NAME_SEQUENCE_INPUT_FORMAT = 
"org.apache.hadoop.mapred.SequenceFileInputFormat";
     public static final String CLASS_NAME_PARQUET_INPUT_FORMAT =
             
"org.apache.asterix.external.input.record.reader.hdfs.parquet.MapredParquetInputFormat";
+    public static final String CLASS_NAME_AVRO_INPUT_FORMAT = 
AvroFileInputFormat.class.getName();
     public static final String CLASS_NAME_HDFS_FILESYSTEM = 
"org.apache.hadoop.hdfs.DistributedFileSystem";
     public static final String S3A_CHANGE_DETECTION_REQUIRED = 
"requireVersionChangeDetection";
     public static final String S3A_CHANGE_DETECTION_REQUIRED_CONFIG_KEY = 
"fs.s3a.change.detection.version.required";
     public static final String HDFS_IO_COMPRESSION_CODECS_KEY = 
"io.compression.codecs";
+    public static final String HDFS_AVRO_IGNORE_INPUTS_WITHOUT_EXTENSION =
+            "avro.mapred.ignore.inputs.without.extension";
+
     /**
      * input formats aliases
      */
     public static final String INPUT_FORMAT_TEXT = "text-input-format";
     public static final String INPUT_FORMAT_SEQUENCE = "sequence-input-format";
     public static final String INPUT_FORMAT_PARQUET = "parquet-input-format";
+    public static final String INPUT_FORMAT_AVRO = "avro-input-format";
 
     public static final String HDFS_BLOCKSIZE = "blocksize";
     public static final String HDFS_REPLICATION = "replication";
diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java
index 540de09b8e..c7deb7c762 100644
--- 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java
@@ -1139,6 +1139,7 @@ public class ExternalDataUtils {
                 protocol = nodePathPair[0];
                 break;
             case ExternalDataConstants.KEY_ADAPTER_NAME_HDFS:
+                // Remove trailing slashes as prefixes/paths in hdfs start 
with a slash (absolute paths)
                 return 
configurations.get(ExternalDataConstants.KEY_HDFS_URL).replaceAll("/+$", "");
             default:
                 return "";
diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/HDFSUtils.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/HDFSUtils.java
index 35f2a9453f..7c7e031768 100644
--- 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/HDFSUtils.java
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/HDFSUtils.java
@@ -203,6 +203,8 @@ public class HDFSUtils {
                 return ExternalDataConstants.CLASS_NAME_SEQUENCE_INPUT_FORMAT;
             case ExternalDataConstants.INPUT_FORMAT_PARQUET:
                 return ExternalDataConstants.CLASS_NAME_PARQUET_INPUT_FORMAT;
+            case ExternalDataConstants.INPUT_FORMAT_AVRO:
+                return ExternalDataConstants.CLASS_NAME_AVRO_INPUT_FORMAT;
             default:
                 return inputFormatParameter;
         }
@@ -252,6 +254,9 @@ public class HDFSUtils {
 
         if 
(ExternalDataConstants.CLASS_NAME_PARQUET_INPUT_FORMAT.equals(formatClassName)) 
{
             configureParquet(configuration, conf);
+        } else if 
(ExternalDataConstants.CLASS_NAME_AVRO_INPUT_FORMAT.equals(formatClassName)) {
+            
conf.set(ExternalDataConstants.HDFS_AVRO_IGNORE_INPUTS_WITHOUT_EXTENSION, 
ExternalDataConstants.FALSE);
+            disableHadoopFileSystemCache(conf, 
ExternalDataConstants.READER_HDFS);
         }
 
         if 
(configuration.containsKey(ExternalDataConstants.S3A_CHANGE_DETECTION_REQUIRED))
 {

Reply via email to