This is an automated email from the ASF dual-hosted git repository.
mhubail pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/asterixdb.git
The following commit(s) were added to refs/heads/master by this push:
new d687ba59ef [ASTERIXDB-3519][EXT]: Support reading avro files from HDFS
d687ba59ef is described below
commit d687ba59efeebae62fbe72a82257540444cf5134
Author: Savyasach Reddy <[email protected]>
AuthorDate: Mon Dec 2 14:30:29 2024 +0530
[ASTERIXDB-3519][EXT]: Support reading avro files from HDFS
- user model changes: support reading avro records
- storage format changes: no
- interface changes: no
details:
- support reading avro from hdfs
Ext-ref: MB-63117
Change-Id: I7da0b293479df04213c7301391c644c57665eda7
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/19166
Integration-Tests: Jenkins <[email protected]>
Reviewed-by: Murtadha Hubail <[email protected]>
Tested-by: Murtadha Hubail <[email protected]>
---
.../avro/array-access/array-access.01.ddl.sqlpp | 4 +-
.../avro/avro-types/avro-map/avro-map.01.ddl.sqlpp | 4 +-
.../avro-nested-records.01.ddl.sqlpp | 4 +-
.../avro-primitives/avro-primitives.01.ddl.sqlpp | 4 +-
.../avro-types/avro-union/avro-union.01.ddl.sqlpp | 4 +-
.../avro/field-access/field-access.01.ddl.sqlpp | 16 +-
.../heterogeneous-access.1.ddl.sqlpp | 4 +-
.../avro/invalid-avro-files/test.000.ddl.sqlpp | 4 +-
.../avro/invalid-type/invalid-type.1.ddl.sqlpp | 4 +-
.../avro/missing-fields/missing-fields.1.ddl.sqlpp | 4 +-
.../multi-file-multi-schema.1.ddl.sqlpp | 8 +-
.../common/avro/no-files/no-files.1.ddl.sqlpp | 2 +-
.../avro/object-concat/object-concat.1.ddl.sqlpp | 4 +-
.../select-all-fields.1.ddl.sqlpp | 4 +-
.../select-count-one-field.1.ddl.sqlpp | 4 +-
.../string-standard-utf8.1.ddl.sqlpp | 4 +-
.../avro/type-mismatch/type-mismatch.1.ddl.sqlpp | 4 +-
.../avro/embed-flat/embed-flat.000.ddl.sqlpp | 4 +-
.../embed-multiple-values.000.ddl.sqlpp | 4 +-
.../embed-one-value/embed-one-value.000.ddl.sqlpp | 8 +-
.../embed-flat.000.ddl.sqlpp | 4 +-
.../avro/one-field/one-field.000.ddl.sqlpp | 8 +-
.../dynamic-prefixes/avro/query/test.000.ddl.sqlpp | 12 +-
.../avro/using-limit/test.000.ddl.sqlpp | 4 +-
.../dynamic-prefixes/avro/views/test.000.ddl.sqlpp | 8 +-
.../runtimets/testsuite_external_dataset_s3.xml | 50 +++++
.../resources/runtimets/testsuite_sqlpp_hdfs.xml | 208 +++++++++++++++++++++
asterixdb/asterix-external-data/pom.xml | 5 +
.../external/input/HDFSDataSourceFactory.java | 7 +
.../reader/hdfs/AbstractHDFSRecordReader.java | 23 ++-
.../input/record/reader/hdfs/HDFSRecordReader.java | 3 +-
.../reader/hdfs/avro/AvroFileInputFormat.java | 45 +++++
.../reader/hdfs/avro/AvroFileRecordReader.java | 156 ++++++++++++++++
.../hdfs/parquet/ParquetFileRecordReader.java | 3 +-
.../external/util/ExternalDataConstants.java | 6 +
.../asterix/external/util/ExternalDataUtils.java | 1 +
.../apache/asterix/external/util/HDFSUtils.java | 5 +
37 files changed, 566 insertions(+), 80 deletions(-)
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/array-access/array-access.01.ddl.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/array-access/array-access.01.ddl.sqlpp
index 606c7818f4..76ae4494c4 100644
---
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/array-access/array-access.01.ddl.sqlpp
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/array-access/array-access.01.ddl.sqlpp
@@ -34,8 +34,8 @@ CREATE TYPE AvroType as {
CREATE EXTERNAL DATASET AvroDataset(AvroType) USING %adapter%
(
%template%,
- ("container"="playground"),
- ("definition"="avro-data/reviews"),
+ %additional_Properties%,
+ ("definition"="%path_prefix%avro-data/reviews"),
("include"="*dummy_tweet.avro"),
("format" = "avro")
);
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-map/avro-map.01.ddl.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-map/avro-map.01.ddl.sqlpp
index 65a2b38850..ca20804bac 100644
---
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-map/avro-map.01.ddl.sqlpp
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-map/avro-map.01.ddl.sqlpp
@@ -34,8 +34,8 @@ CREATE TYPE AvroType as {
CREATE EXTERNAL DATASET AvroDataset(AvroType) USING %adapter%
(
%template%,
- ("container"="playground"),
- ("definition"="avro-data/reviews"),
+ %additional_Properties%,
+ ("definition"="%path_prefix%avro-data/reviews"),
("include"="*avro_type.avro"),
("format" = "avro")
);
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-nested-records/avro-nested-records.01.ddl.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-nested-records/avro-nested-records.01.ddl.sqlpp
index 65a2b38850..ca20804bac 100644
---
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-nested-records/avro-nested-records.01.ddl.sqlpp
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-nested-records/avro-nested-records.01.ddl.sqlpp
@@ -34,8 +34,8 @@ CREATE TYPE AvroType as {
CREATE EXTERNAL DATASET AvroDataset(AvroType) USING %adapter%
(
%template%,
- ("container"="playground"),
- ("definition"="avro-data/reviews"),
+ %additional_Properties%,
+ ("definition"="%path_prefix%avro-data/reviews"),
("include"="*avro_type.avro"),
("format" = "avro")
);
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-primitives/avro-primitives.01.ddl.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-primitives/avro-primitives.01.ddl.sqlpp
index 65a2b38850..ca20804bac 100644
---
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-primitives/avro-primitives.01.ddl.sqlpp
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-primitives/avro-primitives.01.ddl.sqlpp
@@ -34,8 +34,8 @@ CREATE TYPE AvroType as {
CREATE EXTERNAL DATASET AvroDataset(AvroType) USING %adapter%
(
%template%,
- ("container"="playground"),
- ("definition"="avro-data/reviews"),
+ %additional_Properties%,
+ ("definition"="%path_prefix%avro-data/reviews"),
("include"="*avro_type.avro"),
("format" = "avro")
);
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-union/avro-union.01.ddl.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-union/avro-union.01.ddl.sqlpp
index 65a2b38850..ca20804bac 100644
---
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-union/avro-union.01.ddl.sqlpp
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-union/avro-union.01.ddl.sqlpp
@@ -34,8 +34,8 @@ CREATE TYPE AvroType as {
CREATE EXTERNAL DATASET AvroDataset(AvroType) USING %adapter%
(
%template%,
- ("container"="playground"),
- ("definition"="avro-data/reviews"),
+ %additional_Properties%,
+ ("definition"="%path_prefix%avro-data/reviews"),
("include"="*avro_type.avro"),
("format" = "avro")
);
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/field-access/field-access.01.ddl.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/field-access/field-access.01.ddl.sqlpp
index 5e30b26b45..74b7f179a9 100644
---
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/field-access/field-access.01.ddl.sqlpp
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/field-access/field-access.01.ddl.sqlpp
@@ -34,8 +34,8 @@ CREATE TYPE AvroType as {
CREATE EXTERNAL DATASET AvroDataset(AvroType) USING %adapter%
(
%template%,
- ("container"="playground"),
- ("definition"="avro-data/reviews"),
+ %additional_Properties%,
+ ("definition"="%path_prefix%avro-data/reviews"),
("include"="*id_age.avro"),
("format" = "avro")
);
@@ -43,8 +43,8 @@ CREATE EXTERNAL DATASET AvroDataset(AvroType) USING %adapter%
CREATE EXTERNAL DATASET AvroDataset2(AvroType) USING %adapter%
(
%template%,
- ("container"="playground"),
- ("definition"="avro-data/reviews"),
+ %additional_Properties%,
+ ("definition"="%path_prefix%avro-data/reviews"),
("include"="*id_age.avro"),
("format" = "avro")
);
@@ -52,8 +52,8 @@ CREATE EXTERNAL DATASET AvroDataset2(AvroType) USING %adapter%
CREATE EXTERNAL DATASET AvroDataset3(AvroType) USING %adapter%
(
%template%,
- ("container"="playground"),
- ("definition"="avro-data/reviews"),
+ %additional_Properties%,
+ ("definition"="%path_prefix%avro-data/reviews"),
("include"="*id_name_comment.avro"),
("format" = "avro")
);
@@ -61,8 +61,8 @@ CREATE EXTERNAL DATASET AvroDataset3(AvroType) USING %adapter%
CREATE EXTERNAL DATASET AvroDataset4(AvroType) USING %adapter%
(
%template%,
- ("container"="playground"),
- ("definition"="avro-data/reviews"),
+ %additional_Properties%,
+ ("definition"="%path_prefix%avro-data/reviews"),
("include"="*dummy_tweet.avro"),
("format" = "avro")
);
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/heterogeneous-access/heterogeneous-access.1.ddl.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/heterogeneous-access/heterogeneous-access.1.ddl.sqlpp
index ce5eb8a257..9445a2c05e 100644
---
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/heterogeneous-access/heterogeneous-access.1.ddl.sqlpp
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/heterogeneous-access/heterogeneous-access.1.ddl.sqlpp
@@ -34,8 +34,8 @@ CREATE TYPE AvroType as {
CREATE EXTERNAL DATASET AvroDataset(AvroType) USING %adapter%
(
%template%,
- ("container"="playground"),
- ("definition"="avro-data/reviews"),
+ %additional_Properties%,
+ ("definition"="%path_prefix%avro-data/reviews"),
("include"="*heterogeneous*"),
("format" = "avro")
);
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/invalid-avro-files/test.000.ddl.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/invalid-avro-files/test.000.ddl.sqlpp
index a3a6d0dffa..ac678fb82d 100644
---
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/invalid-avro-files/test.000.ddl.sqlpp
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/invalid-avro-files/test.000.ddl.sqlpp
@@ -28,7 +28,7 @@ CREATE TYPE AvroType as {
CREATE EXTERNAL DATASET AvroDataset(AvroType) USING %adapter%
(
%template%,
- ("container"="playground"),
- ("definition"="json-data/reviews/single-line/json"),
+ %additional_Properties%,
+ ("definition"="%path_prefix%json-data/reviews/single-line/json"),
("format" = "avro")
);
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/invalid-type/invalid-type.1.ddl.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/invalid-type/invalid-type.1.ddl.sqlpp
index c6ee97004d..4f4e322634 100644
---
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/invalid-type/invalid-type.1.ddl.sqlpp
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/invalid-type/invalid-type.1.ddl.sqlpp
@@ -35,8 +35,8 @@ CREATE TYPE AvroType as {
CREATE EXTERNAL DATASET AvroDataset(AvroType) USING %adapter%
(
%template%,
- ("container"="playground"),
- ("definition"="avro-data/reviews"),
+ %additional_Properties%,
+ ("definition"="%path_prefix%avro-data/reviews"),
("include"="*id_age.avro"),
("format" = "avro")
);
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/missing-fields/missing-fields.1.ddl.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/missing-fields/missing-fields.1.ddl.sqlpp
index 3c8e934a5a..74664c50d2 100644
---
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/missing-fields/missing-fields.1.ddl.sqlpp
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/missing-fields/missing-fields.1.ddl.sqlpp
@@ -34,8 +34,8 @@ CREATE TYPE AvroType as {
CREATE EXTERNAL DATASET AvroDataset(AvroType) USING %adapter%
(
%template%,
- ("container"="playground"),
- ("definition"="avro-data/reviews"),
+ %additional_Properties%,
+ ("definition"="%path_prefix%avro-data/reviews"),
("include"="*dummy_tweet.avro"),
("format" = "avro")
);
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/multi-file-multi-schema/multi-file-multi-schema.1.ddl.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/multi-file-multi-schema/multi-file-multi-schema.1.ddl.sqlpp
index 67e38d2903..92dca50b83 100644
---
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/multi-file-multi-schema/multi-file-multi-schema.1.ddl.sqlpp
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/multi-file-multi-schema/multi-file-multi-schema.1.ddl.sqlpp
@@ -34,8 +34,8 @@ CREATE TYPE AvroType as {
CREATE EXTERNAL DATASET AvroDataset(AvroType) USING %adapter%
(
%template%,
- ("container"="playground"),
- ("definition"="avro-data/reviews"),
+ %additional_Properties%,
+ ("definition"="%path_prefix%avro-data/reviews"),
("include"="*id_age.avro"),
("include#1"="*id_name.avro"),
("format" = "avro")
@@ -44,8 +44,8 @@ CREATE EXTERNAL DATASET AvroDataset(AvroType) USING %adapter%
CREATE EXTERNAL DATASET AvroDataset2(AvroType) USING %adapter%
(
%template%,
- ("container"="playground"),
- ("definition"="avro-data/reviews"),
+ %additional_Properties%,
+ ("definition"="%path_prefix%avro-data/reviews"),
("include"="*id_age.avro"),
("include#1"="*id_age-string.avro"),
("format" = "avro")
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/no-files/no-files.1.ddl.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/no-files/no-files.1.ddl.sqlpp
index 507d6fa4c7..950bfba61e 100644
---
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/no-files/no-files.1.ddl.sqlpp
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/no-files/no-files.1.ddl.sqlpp
@@ -36,7 +36,7 @@ CREATE TYPE AvroType as {
CREATE EXTERNAL DATASET AvroDataset(AvroType) USING %adapter%
(
%template%,
- ("container"="playground"),
+ %additional_Properties%,
("definition"="NOT_A_DEFINITION"),
("format" = "avro")
);
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/object-concat/object-concat.1.ddl.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/object-concat/object-concat.1.ddl.sqlpp
index c7f127b3e6..6f64c800f6 100644
---
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/object-concat/object-concat.1.ddl.sqlpp
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/object-concat/object-concat.1.ddl.sqlpp
@@ -34,8 +34,8 @@ CREATE TYPE AvroType as {
CREATE EXTERNAL DATASET AvroDataset(AvroType) USING %adapter%
(
%template%,
- ("container"="playground"),
- ("definition"="avro-data/reviews"),
+ %additional_Properties%,
+ ("definition"="%path_prefix%avro-data/reviews"),
("include"="*dummy_tweet.avro"),
("format" = "avro")
);
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/select-all-fields/select-all-fields.1.ddl.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/select-all-fields/select-all-fields.1.ddl.sqlpp
index 6d77dab57e..fb2d3db534 100644
---
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/select-all-fields/select-all-fields.1.ddl.sqlpp
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/select-all-fields/select-all-fields.1.ddl.sqlpp
@@ -34,8 +34,8 @@ CREATE TYPE AvroType as {
CREATE EXTERNAL DATASET AvroDataset(AvroType) USING %adapter%
(
%template%,
-("container"="playground"),
-("definition"="avro-data/reviews"),
+%additional_Properties%,
+("definition"="%path_prefix%avro-data/reviews"),
("include"="*dummy_tweet.avro"),
("format" = "avro")
);
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/select-count-one-field/select-count-one-field.1.ddl.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/select-count-one-field/select-count-one-field.1.ddl.sqlpp
index 052b1272ad..285d19c998 100644
---
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/select-count-one-field/select-count-one-field.1.ddl.sqlpp
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/select-count-one-field/select-count-one-field.1.ddl.sqlpp
@@ -34,8 +34,8 @@ CREATE TYPE AvroType as {
CREATE EXTERNAL DATASET AvroDataset(AvroType) USING %adapter%
(
%template%,
- ("container"="playground"),
- ("definition"="avro-data/reviews"),
+ %additional_Properties%,
+ ("definition"="%path_prefix%avro-data/reviews"),
("include"="*dummy_tweet.avro"),
("format" = "avro")
);
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/string-standard-utf8/string-standard-utf8.1.ddl.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/string-standard-utf8/string-standard-utf8.1.ddl.sqlpp
index 17433a5a8e..d36da08ae8 100644
---
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/string-standard-utf8/string-standard-utf8.1.ddl.sqlpp
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/string-standard-utf8/string-standard-utf8.1.ddl.sqlpp
@@ -34,8 +34,8 @@ CREATE TYPE AvroType as {
CREATE EXTERNAL DATASET AvroDataset(AvroType) USING %adapter%
(
%template%,
- ("container"="playground"),
- ("definition"="avro-data/reviews"),
+ %additional_Properties%,
+ ("definition"="%path_prefix%avro-data/reviews"),
("include"="*id_name_comment.avro"),
("format" = "avro")
);
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/type-mismatch/type-mismatch.1.ddl.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/type-mismatch/type-mismatch.1.ddl.sqlpp
index c8d7d212fa..d6d9dec57d 100644
---
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/type-mismatch/type-mismatch.1.ddl.sqlpp
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/type-mismatch/type-mismatch.1.ddl.sqlpp
@@ -34,8 +34,8 @@ CREATE TYPE AvroType as {
CREATE EXTERNAL DATASET AvroDataset(AvroType) USING %adapter%
(
%template%,
- ("container"="playground"),
- ("definition"="avro-data/reviews"),
+ %additional_Properties%,
+ ("definition"="%path_prefix%avro-data/reviews"),
("include"="*dummy_tweet.avro"),
("format" = "avro")
);
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/dynamic-prefixes/avro/embed-flat/embed-flat.000.ddl.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/dynamic-prefixes/avro/embed-flat/embed-flat.000.ddl.sqlpp
index 654a84aa1f..dd5c56d1c2 100644
---
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/dynamic-prefixes/avro/embed-flat/embed-flat.000.ddl.sqlpp
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/dynamic-prefixes/avro/embed-flat/embed-flat.000.ddl.sqlpp
@@ -30,8 +30,8 @@ CREATE TYPE TestType AS {
CREATE EXTERNAL DATASET Name(TestType) USING %adapter% (
%template%,
- ("container"="playground"),
- ("definition"="avro-data/external-filter/embed/name/{name:string}"),
+ %additional_Properties%,
+
("definition"="%path_prefix%avro-data/external-filter/embed/name/{name:string}"),
("embed-filter-values" = "true"),
("format"="avro")
);
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/dynamic-prefixes/avro/embed-multiple-values/embed-multiple-values.000.ddl.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/dynamic-prefixes/avro/embed-multiple-values/embed-multiple-values.000.ddl.sqlpp
index be3d97fe74..b835823ce4 100644
---
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/dynamic-prefixes/avro/embed-multiple-values/embed-multiple-values.000.ddl.sqlpp
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/dynamic-prefixes/avro/embed-multiple-values/embed-multiple-values.000.ddl.sqlpp
@@ -26,8 +26,8 @@ CREATE TYPE test AS {
CREATE EXTERNAL DATASET maintenance(test) USING %adapter% (
%template%,
- ("container"="playground"),
-
("definition"="avro-data/external-filter/embed/car/{company:string}/customer/{customer_id:int}/maintenance-report/year-{year:int}-month-{month:int}-day-{day:int}-date"),
+ %additional_Properties%,
+
("definition"="%path_prefix%avro-data/external-filter/embed/car/{company:string}/customer/{customer_id:int}/maintenance-report/year-{year:int}-month-{month:int}-day-{day:int}-date"),
("embed-filter-values" = "true"),
("format"="avro")
);
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/dynamic-prefixes/avro/embed-one-value/embed-one-value.000.ddl.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/dynamic-prefixes/avro/embed-one-value/embed-one-value.000.ddl.sqlpp
index b1b1b76045..c947f707ce 100644
---
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/dynamic-prefixes/avro/embed-one-value/embed-one-value.000.ddl.sqlpp
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/dynamic-prefixes/avro/embed-one-value/embed-one-value.000.ddl.sqlpp
@@ -27,16 +27,16 @@ CREATE TYPE TestType AS {
CREATE EXTERNAL DATASET Department(TestType) USING %adapter% (
%template%,
- ("container"="playground"),
-
("definition"="avro-data/external-filter/embed/department/{department:string}"),
+ %additional_Properties%,
+
("definition"="%path_prefix%avro-data/external-filter/embed/department/{department:string}"),
("embed-filter-values" = "true"),
("format"="avro")
);
CREATE EXTERNAL DATASET LastName(TestType) USING %adapter% (
%template%,
- ("container"="playground"),
-
("definition"="avro-data/external-filter/embed/last-name/{name.last:string}"),
+ %additional_Properties%,
+
("definition"="%path_prefix%avro-data/external-filter/embed/last-name/{name.last:string}"),
("embed-filter-values" = "true"),
("format"="avro")
);
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/dynamic-prefixes/avro/embed-with-nested-values/embed-flat.000.ddl.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/dynamic-prefixes/avro/embed-with-nested-values/embed-flat.000.ddl.sqlpp
index 2c5b352c8b..531e3981a3 100644
---
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/dynamic-prefixes/avro/embed-with-nested-values/embed-flat.000.ddl.sqlpp
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/dynamic-prefixes/avro/embed-with-nested-values/embed-flat.000.ddl.sqlpp
@@ -30,8 +30,8 @@ CREATE TYPE TestType AS {
CREATE EXTERNAL DATASET Name(TestType) USING %adapter% (
%template%,
- ("container"="playground"),
-
("definition"="avro-data/external-filter/embed/nested-value/{name:string}"),
+ %additional_Properties%,
+
("definition"="%path_prefix%avro-data/external-filter/embed/nested-value/{name:string}"),
("embed-filter-values" = "true"),
("format"="avro")
);
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/dynamic-prefixes/avro/one-field/one-field.000.ddl.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/dynamic-prefixes/avro/one-field/one-field.000.ddl.sqlpp
index 5fcfff5f30..173d1d0978 100644
---
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/dynamic-prefixes/avro/one-field/one-field.000.ddl.sqlpp
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/dynamic-prefixes/avro/one-field/one-field.000.ddl.sqlpp
@@ -27,16 +27,16 @@ CREATE TYPE TestType AS {
CREATE EXTERNAL DATASET Department(TestType) USING %adapter% (
%template%,
- ("container"="playground"),
- ("definition"="avro-data/external-filter/department/{department:string}"),
+ %additional_Properties%,
+
("definition"="%path_prefix%avro-data/external-filter/department/{department:string}"),
("embed-filter-values" = "false"),
("format"="avro")
);
CREATE EXTERNAL DATASET LastName(TestType) USING %adapter% (
%template%,
- ("container"="playground"),
- ("definition"="avro-data/external-filter/last-name/{name.last:string}"),
+ %additional_Properties%,
+
("definition"="%path_prefix%avro-data/external-filter/last-name/{name.last:string}"),
("embed-filter-values" = "false"),
("format"="avro")
);
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/dynamic-prefixes/avro/query/test.000.ddl.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/dynamic-prefixes/avro/query/test.000.ddl.sqlpp
index 4c418b480e..49cc3e6de8 100644
---
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/dynamic-prefixes/avro/query/test.000.ddl.sqlpp
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/dynamic-prefixes/avro/query/test.000.ddl.sqlpp
@@ -27,24 +27,24 @@ CREATE TYPE test AS {
CREATE EXTERNAL DATASET company(test) USING %adapter% (
%template%,
- ("container"="playground"),
- ("definition"="avro-data/external-filter/car/{company:string}"),
+ %additional_Properties%,
+
("definition"="%path_prefix%avro-data/external-filter/car/{company:string}"),
("embed-filter-values" = "false"),
("format"="avro")
);
CREATE EXTERNAL DATASET customer(test) USING %adapter% (
%template%,
- ("container"="playground"),
-
("definition"="avro-data/external-filter/car/{company:string}/customer/{customer_id:int}"),
+ %additional_Properties%,
+
("definition"="%path_prefix%avro-data/external-filter/car/{company:string}/customer/{customer_id:int}"),
("embed-filter-values" = "false"),
("format"="avro")
);
CREATE EXTERNAL DATASET maintenance(test) USING %adapter% (
%template%,
- ("container"="playground"),
-
("definition"="avro-data/external-filter/car/{company:string}/customer/{customer_id:int}/maintenance-report/year-{year:int}-month-{month:int}-day-{day:int}-date"),
+ %additional_Properties%,
+
("definition"="%path_prefix%avro-data/external-filter/car/{company:string}/customer/{customer_id:int}/maintenance-report/year-{year:int}-month-{month:int}-day-{day:int}-date"),
("embed-filter-values" = "false"),
("format"="avro")
);
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/dynamic-prefixes/avro/using-limit/test.000.ddl.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/dynamic-prefixes/avro/using-limit/test.000.ddl.sqlpp
index 13316e854e..073965b66c 100644
---
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/dynamic-prefixes/avro/using-limit/test.000.ddl.sqlpp
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/dynamic-prefixes/avro/using-limit/test.000.ddl.sqlpp
@@ -26,8 +26,8 @@ CREATE TYPE test AS {
CREATE EXTERNAL DATASET maintenance(test) USING %adapter% (
%template%,
- ("container"="playground"),
-
("definition"="avro-data/external-filter/car/{company:string}/customer/{customer_id:int}/maintenance-report/year-{year:int}-month-{month:int}-day-{day:int}-date"),
+ %additional_Properties%,
+
("definition"="%path_prefix%avro-data/external-filter/car/{company:string}/customer/{customer_id:int}/maintenance-report/year-{year:int}-month-{month:int}-day-{day:int}-date"),
("embed-filter-values" = "false"),
("format"="avro")
);
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/dynamic-prefixes/avro/views/test.000.ddl.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/dynamic-prefixes/avro/views/test.000.ddl.sqlpp
index 3c2b513116..495acc88ff 100644
---
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/dynamic-prefixes/avro/views/test.000.ddl.sqlpp
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/dynamic-prefixes/avro/views/test.000.ddl.sqlpp
@@ -26,8 +26,8 @@ CREATE TYPE test AS {
CREATE EXTERNAL DATASET maintenance(test) USING %adapter% (
%template%,
- ("container"="playground"),
-
("definition"="avro-data/external-filter/embed/car/{company:string}/customer/{customer_id:int}/maintenance-report/year-{year:int}-month-{month:int}-day-{day:int}-date"),
+ %additional_Properties%,
+
("definition"="%path_prefix%avro-data/external-filter/embed/car/{company:string}/customer/{customer_id:int}/maintenance-report/year-{year:int}-month-{month:int}-day-{day:int}-date"),
("embed-filter-values" = "true"),
("format"="avro")
);
@@ -39,8 +39,8 @@ SELECT customer_id, year, month, day, company
CREATE EXTERNAL DATASET maintenance2(test) USING %adapter% (
%template%,
- ("container"="playground"),
-
("definition"="avro-data/external-filter/embed/car/{company:string}/customer/{customerId:int}/maintenance-report/year-{year:int}-month-{month:int}-day-{day:int}-date"),
+ %additional_Properties%,
+
("definition"="%path_prefix%avro-data/external-filter/embed/car/{company:string}/customer/{customerId:int}/maintenance-report/year-{year:int}-month-{month:int}-day-{day:int}-date"),
("embed-filter-values" = "true"),
("format"="avro")
);
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
index c8a6785a20..ff1b325c9f 100644
---
a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
@@ -558,72 +558,96 @@
<test-case FilePath="external-dataset">
<compilation-unit name="common/avro/avro-types/avro-map">
<placeholder name="adapter" value="S3" />
+ <placeholder name="path_prefix" value="" />
+ <placeholder name="additional_Properties" value='("container" =
"playground")' />
<output-dir compare="Text">common/avro/avro-types/avro-map</output-dir>
</compilation-unit>
</test-case>
<test-case FilePath="external-dataset">
<compilation-unit name="common/avro/avro-types/avro-nested-records">
<placeholder name="adapter" value="S3" />
+ <placeholder name="path_prefix" value="" />
+ <placeholder name="additional_Properties" value='("container" =
"playground")' />
<output-dir
compare="Text">common/avro/avro-types/avro-nested-records</output-dir>
</compilation-unit>
</test-case>
<test-case FilePath="external-dataset">
<compilation-unit name="common/avro/avro-types/avro-primitives">
<placeholder name="adapter" value="S3" />
+ <placeholder name="path_prefix" value="" />
+ <placeholder name="additional_Properties" value='("container" =
"playground")' />
<output-dir
compare="Text">common/avro/avro-types/avro-primitives</output-dir>
</compilation-unit>
</test-case>
<test-case FilePath="external-dataset">
<compilation-unit name="common/avro/avro-types/avro-union">
<placeholder name="adapter" value="S3" />
+ <placeholder name="path_prefix" value="" />
+ <placeholder name="additional_Properties" value='("container" =
"playground")' />
<output-dir
compare="Text">common/avro/avro-types/avro-union</output-dir>
</compilation-unit>
</test-case>
<test-case FilePath="external-dataset">
<compilation-unit name="common/avro/type-mismatch">
<placeholder name="adapter" value="S3" />
+ <placeholder name="path_prefix" value="" />
+ <placeholder name="additional_Properties" value='("container" =
"playground")' />
<output-dir compare="Text">common/avro/type-mismatch</output-dir>
</compilation-unit>
</test-case>
<test-case FilePath="external-dataset">
<compilation-unit name="common/avro/string-standard-utf8">
<placeholder name="adapter" value="S3" />
+ <placeholder name="path_prefix" value="" />
+ <placeholder name="additional_Properties" value='("container" =
"playground")' />
<output-dir
compare="Text">common/avro/string-standard-utf8</output-dir>
</compilation-unit>
</test-case>
<test-case FilePath="external-dataset">
<compilation-unit name="common/avro/select-all-fields">
<placeholder name="adapter" value="S3" />
+ <placeholder name="path_prefix" value="" />
+ <placeholder name="additional_Properties" value='("container" =
"playground")' />
<output-dir compare="Text">common/avro/select-all-fields</output-dir>
</compilation-unit>
</test-case>
<test-case FilePath="external-dataset">
<compilation-unit name="common/avro/select-count-one-field">
<placeholder name="adapter" value="S3" />
+ <placeholder name="path_prefix" value="" />
+ <placeholder name="additional_Properties" value='("container" =
"playground")' />
<output-dir
compare="Text">common/avro/select-count-one-field</output-dir>
</compilation-unit>
</test-case>
<test-case FilePath="external-dataset">
<compilation-unit name="common/avro/array-access">
<placeholder name="adapter" value="S3" />
+ <placeholder name="path_prefix" value="" />
+ <placeholder name="additional_Properties" value='("container" =
"playground")' />
<output-dir compare="Text">common/avro/array-access</output-dir>
</compilation-unit>
</test-case>
<test-case FilePath="external-dataset">
<compilation-unit name="common/avro/field-access">
<placeholder name="adapter" value="S3" />
+ <placeholder name="path_prefix" value="" />
+ <placeholder name="additional_Properties" value='("container" =
"playground")' />
<output-dir compare="Text">common/avro/field-access</output-dir>
</compilation-unit>
</test-case>
<test-case FilePath="external-dataset">
<compilation-unit name="common/avro/heterogeneous-access">
<placeholder name="adapter" value="S3" />
+ <placeholder name="path_prefix" value="" />
+ <placeholder name="additional_Properties" value='("container" =
"playground")' />
<output-dir
compare="Text">common/avro/heterogeneous-access</output-dir>
</compilation-unit>
</test-case>
<test-case FilePath="external-dataset">
<compilation-unit name="common/avro/invalid-avro-files">
<placeholder name="adapter" value="S3" />
+ <placeholder name="path_prefix" value="" />
+ <placeholder name="additional_Properties" value='("container" =
"playground")' />
<output-dir compare="Text">none</output-dir>
<expected-error>External source error.
org.apache.avro.InvalidAvroMagicException: Not an Avro data
file.</expected-error>
<source-location>false</source-location>
@@ -632,6 +656,8 @@
<test-case FilePath="external-dataset">
<compilation-unit name="common/avro/invalid-type">
<placeholder name="adapter" value="S3" />
+ <placeholder name="path_prefix" value="" />
+ <placeholder name="additional_Properties" value='("container" =
"playground")' />
<output-dir compare="Text">none</output-dir>
<expected-error>ASX3123: Type 'AvroType' contains declared fields,
which is not supported for 'avro' format</expected-error>
</compilation-unit>
@@ -639,24 +665,32 @@
<test-case FilePath="external-dataset">
<compilation-unit name="common/avro/missing-fields">
<placeholder name="adapter" value="S3" />
+ <placeholder name="path_prefix" value="" />
+ <placeholder name="additional_Properties" value='("container" =
"playground")' />
<output-dir compare="Text">common/avro/missing-fields</output-dir>
</compilation-unit>
</test-case>
<test-case FilePath="external-dataset">
<compilation-unit name="common/avro/multi-file-multi-schema">
<placeholder name="adapter" value="S3" />
+ <placeholder name="path_prefix" value="" />
+ <placeholder name="additional_Properties" value='("container" =
"playground")' />
<output-dir
compare="Text">common/avro/multi-file-multi-schema</output-dir>
</compilation-unit>
</test-case>
<test-case FilePath="external-dataset">
<compilation-unit name="common/avro/no-files">
<placeholder name="adapter" value="S3" />
+ <placeholder name="path_prefix" value="" />
+ <placeholder name="additional_Properties" value='("container" =
"playground")' />
<output-dir compare="Text">common/avro/no-files</output-dir>
</compilation-unit>
</test-case>
<test-case FilePath="external-dataset">
<compilation-unit name="common/avro/object-concat">
<placeholder name="adapter" value="S3" />
+ <placeholder name="path_prefix" value="" />
+ <placeholder name="additional_Properties" value='("container" =
"playground")' />
<output-dir compare="Text">common/avro/object-concat</output-dir>
</compilation-unit>
</test-case>
@@ -891,48 +925,64 @@
<test-case FilePath="external-dataset/common/dynamic-prefixes/avro">
<compilation-unit name="one-field">
<placeholder name="adapter" value="S3" />
+ <placeholder name="path_prefix" value="" />
+ <placeholder name="additional_Properties" value='("container" =
"playground")' />
<output-dir compare="Text">one-field</output-dir>
</compilation-unit>
</test-case>
<test-case FilePath="external-dataset/common/dynamic-prefixes/avro">
<compilation-unit name="query">
<placeholder name="adapter" value="S3" />
+ <placeholder name="path_prefix" value="" />
+ <placeholder name="additional_Properties" value='("container" =
"playground")' />
<output-dir compare="Text">query</output-dir>
</compilation-unit>
</test-case>
<test-case FilePath="external-dataset/common/dynamic-prefixes/avro">
<compilation-unit name="embed-one-value">
<placeholder name="adapter" value="S3" />
+ <placeholder name="path_prefix" value="" />
+ <placeholder name="additional_Properties" value='("container" =
"playground")' />
<output-dir compare="Text">embed-one-value</output-dir>
</compilation-unit>
</test-case>
<test-case FilePath="external-dataset/common/dynamic-prefixes/avro">
<compilation-unit name="embed-multiple-values">
<placeholder name="adapter" value="S3" />
+ <placeholder name="path_prefix" value="" />
+ <placeholder name="additional_Properties" value='("container" =
"playground")' />
<output-dir compare="Text">embed-multiple-values</output-dir>
</compilation-unit>
</test-case>
<test-case FilePath="external-dataset/common/dynamic-prefixes/avro">
<compilation-unit name="embed-flat">
<placeholder name="adapter" value="S3" />
+ <placeholder name="path_prefix" value="" />
+ <placeholder name="additional_Properties" value='("container" =
"playground")' />
<output-dir compare="Text">embed-flat</output-dir>
</compilation-unit>
</test-case>
<test-case FilePath="external-dataset/common/dynamic-prefixes/avro">
<compilation-unit name="embed-with-nested-values">
<placeholder name="adapter" value="S3" />
+ <placeholder name="path_prefix" value="" />
+ <placeholder name="additional_Properties" value='("container" =
"playground")' />
<output-dir compare="Text">embed-with-nested-values</output-dir>
</compilation-unit>
</test-case>
<test-case FilePath="external-dataset/common/dynamic-prefixes/avro">
<compilation-unit name="using-limit">
<placeholder name="adapter" value="S3" />
+ <placeholder name="path_prefix" value="" />
+ <placeholder name="additional_Properties" value='("container" =
"playground")' />
<output-dir compare="Text">using-limit</output-dir>
</compilation-unit>
</test-case>
<test-case FilePath="external-dataset/common/dynamic-prefixes/avro">
<compilation-unit name="views">
<placeholder name="adapter" value="S3" />
+ <placeholder name="path_prefix" value="" />
+ <placeholder name="additional_Properties" value='("container" =
"playground")' />
<output-dir compare="Text">views</output-dir>
</compilation-unit>
</test-case>
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp_hdfs.xml
b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp_hdfs.xml
index 6851433fca..b178efd99f 100644
---
a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp_hdfs.xml
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp_hdfs.xml
@@ -359,4 +359,212 @@
<!-- </compilation-unit>-->
<!-- </test-case>-->
</test-group>
+ <test-group name="avro">
+ <test-case FilePath="external-dataset">
+ <compilation-unit name="common/avro/avro-types/avro-map">
+ <placeholder name="adapter" value="hdfs" />
+ <placeholder name="path_prefix" value="/playground/" />
+ <placeholder name="additional_Properties" value='("input-format" =
"avro-input-format")' />
+ <output-dir compare="Text">common/avro/avro-types/avro-map</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="external-dataset">
+ <compilation-unit name="common/avro/avro-types/avro-nested-records">
+ <placeholder name="adapter" value="hdfs" />
+ <placeholder name="path_prefix" value="/playground/" />
+ <placeholder name="additional_Properties" value='("input-format" =
"avro-input-format")' />
+ <output-dir
compare="Text">common/avro/avro-types/avro-nested-records</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="external-dataset">
+ <compilation-unit name="common/avro/avro-types/avro-primitives">
+ <placeholder name="adapter" value="hdfs" />
+ <placeholder name="path_prefix" value="/playground/" />
+ <placeholder name="additional_Properties" value='("input-format" =
"avro-input-format")' />
+ <output-dir
compare="Text">common/avro/avro-types/avro-primitives</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="external-dataset">
+ <compilation-unit name="common/avro/avro-types/avro-union">
+ <placeholder name="adapter" value="hdfs" />
+ <placeholder name="path_prefix" value="/playground/" />
+ <placeholder name="additional_Properties" value='("input-format" =
"avro-input-format")' />
+ <output-dir
compare="Text">common/avro/avro-types/avro-union</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="external-dataset">
+ <compilation-unit name="common/avro/type-mismatch">
+ <placeholder name="adapter" value="hdfs" />
+ <placeholder name="path_prefix" value="/playground/" />
+ <placeholder name="additional_Properties" value='("input-format" =
"avro-input-format")' />
+ <output-dir compare="Text">common/avro/type-mismatch</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="external-dataset">
+ <compilation-unit name="common/avro/string-standard-utf8">
+ <placeholder name="adapter" value="hdfs" />
+ <placeholder name="path_prefix" value="/playground/" />
+ <placeholder name="additional_Properties" value='("input-format" =
"avro-input-format")' />
+ <output-dir
compare="Text">common/avro/string-standard-utf8</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="external-dataset">
+ <compilation-unit name="common/avro/select-all-fields">
+ <placeholder name="adapter" value="hdfs" />
+ <placeholder name="path_prefix" value="/playground/" />
+ <placeholder name="additional_Properties" value='("input-format" =
"avro-input-format")' />
+ <output-dir compare="Text">common/avro/select-all-fields</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="external-dataset">
+ <compilation-unit name="common/avro/select-count-one-field">
+ <placeholder name="adapter" value="hdfs" />
+ <placeholder name="path_prefix" value="/playground/" />
+ <placeholder name="additional_Properties" value='("input-format" =
"avro-input-format")' />
+ <output-dir
compare="Text">common/avro/select-count-one-field</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="external-dataset">
+ <compilation-unit name="common/avro/array-access">
+ <placeholder name="adapter" value="hdfs" />
+ <placeholder name="path_prefix" value="/playground/" />
+ <placeholder name="additional_Properties" value='("input-format" =
"avro-input-format")' />
+ <output-dir compare="Text">common/avro/array-access</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="external-dataset">
+ <compilation-unit name="common/avro/field-access">
+ <placeholder name="adapter" value="hdfs" />
+ <placeholder name="path_prefix" value="/playground/" />
+ <placeholder name="additional_Properties" value='("input-format" =
"avro-input-format")' />
+ <output-dir compare="Text">common/avro/field-access</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="external-dataset">
+ <compilation-unit name="common/avro/heterogeneous-access">
+ <placeholder name="adapter" value="hdfs" />
+ <placeholder name="path_prefix" value="/playground/" />
+ <placeholder name="additional_Properties" value='("input-format" =
"avro-input-format")' />
+ <output-dir
compare="Text">common/avro/heterogeneous-access</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="external-dataset">
+ <compilation-unit name="common/avro/invalid-avro-files">
+ <placeholder name="adapter" value="hdfs" />
+ <placeholder name="path_prefix" value="/playground/" />
+ <placeholder name="additional_Properties" value='("input-format" =
"avro-input-format")' />
+ <output-dir compare="Text">none</output-dir>
+ <expected-error>External source error.
org.apache.avro.InvalidAvroMagicException: Not an Avro data
file</expected-error>
+ <source-location>false</source-location>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="external-dataset">
+ <compilation-unit name="common/avro/invalid-type">
+ <placeholder name="adapter" value="hdfs" />
+ <placeholder name="path_prefix" value="/playground/" />
+ <placeholder name="additional_Properties" value='("input-format" =
"avro-input-format")' />
+ <output-dir compare="Text">none</output-dir>
+ <expected-error>ASX3123: Type 'AvroType' contains declared fields,
which is not supported for 'avro' format</expected-error>
+ <source-location>false</source-location>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="external-dataset">
+ <compilation-unit name="common/avro/missing-fields">
+ <placeholder name="adapter" value="hdfs" />
+ <placeholder name="path_prefix" value="/playground/" />
+ <placeholder name="additional_Properties" value='("input-format" =
"avro-input-format")' />
+ <output-dir compare="Text">common/avro/missing-fields</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="external-dataset">
+ <compilation-unit name="common/avro/multi-file-multi-schema">
+ <placeholder name="adapter" value="hdfs" />
+ <placeholder name="path_prefix" value="/playground/" />
+ <placeholder name="additional_Properties" value='("input-format" =
"avro-input-format")' />
+ <output-dir
compare="Text">common/avro/multi-file-multi-schema</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="external-dataset">
+ <compilation-unit name="common/avro/no-files">
+ <placeholder name="adapter" value="hdfs" />
+ <placeholder name="path_prefix" value="/playground/" />
+ <placeholder name="additional_Properties" value='("input-format" =
"avro-input-format")' />
+ <output-dir compare="Text">common/avro/no-files</output-dir>
+ <expected-error>ASX1114: The provided external dataset configuration
returned no files from the external source</expected-error>
+ <source-location>false</source-location>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="external-dataset">
+ <compilation-unit name="common/avro/object-concat">
+ <placeholder name="adapter" value="hdfs" />
+ <placeholder name="path_prefix" value="/playground/" />
+ <placeholder name="additional_Properties" value='("input-format" =
"avro-input-format")' />
+ <output-dir compare="Text">common/avro/object-concat</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="external-dataset/common/dynamic-prefixes/avro">
+ <compilation-unit name="one-field">
+ <placeholder name="adapter" value="hdfs" />
+ <placeholder name="path_prefix" value="/playground/" />
+ <placeholder name="additional_Properties" value='("input-format" =
"avro-input-format")' />
+ <output-dir compare="Text">one-field</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="external-dataset/common/dynamic-prefixes/avro">
+ <compilation-unit name="query">
+ <placeholder name="adapter" value="hdfs" />
+ <placeholder name="path_prefix" value="/playground/" />
+ <placeholder name="additional_Properties" value='("input-format" =
"avro-input-format")' />
+ <output-dir compare="Text">query</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="external-dataset/common/dynamic-prefixes/avro">
+ <compilation-unit name="embed-one-value">
+ <placeholder name="adapter" value="hdfs" />
+ <placeholder name="path_prefix" value="/playground/" />
+ <placeholder name="additional_Properties" value='("input-format" =
"avro-input-format")' />
+ <output-dir compare="Text">embed-one-value</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="external-dataset/common/dynamic-prefixes/avro">
+ <compilation-unit name="embed-multiple-values">
+ <placeholder name="adapter" value="hdfs" />
+ <placeholder name="path_prefix" value="/playground/" />
+ <placeholder name="additional_Properties" value='("input-format" =
"avro-input-format")' />
+ <output-dir compare="Text">embed-multiple-values</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="external-dataset/common/dynamic-prefixes/avro">
+ <compilation-unit name="embed-flat">
+ <placeholder name="adapter" value="hdfs" />
+ <placeholder name="path_prefix" value="/playground/" />
+ <placeholder name="additional_Properties" value='("input-format" =
"avro-input-format")' />
+ <output-dir compare="Text">embed-flat</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="external-dataset/common/dynamic-prefixes/avro">
+ <compilation-unit name="embed-with-nested-values">
+ <placeholder name="adapter" value="hdfs" />
+ <placeholder name="path_prefix" value="/playground/" />
+ <placeholder name="additional_Properties" value='("input-format" =
"avro-input-format")' />
+ <output-dir compare="Text">embed-with-nested-values</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="external-dataset/common/dynamic-prefixes/avro">
+ <compilation-unit name="using-limit">
+ <placeholder name="adapter" value="hdfs" />
+ <placeholder name="path_prefix" value="/playground/" />
+ <placeholder name="additional_Properties" value='("input-format" =
"avro-input-format")' />
+ <output-dir compare="Text">using-limit</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="external-dataset/common/dynamic-prefixes/avro">
+ <compilation-unit name="views">
+ <placeholder name="adapter" value="hdfs" />
+ <placeholder name="path_prefix" value="/playground/" />
+ <placeholder name="additional_Properties" value='("input-format" =
"avro-input-format")' />
+ <output-dir compare="Text">views</output-dir>
+ </compilation-unit>
+ </test-case>
+ </test-group>
</test-suite>
diff --git a/asterixdb/asterix-external-data/pom.xml
b/asterixdb/asterix-external-data/pom.xml
index 27c3ac1c38..e1cdd3a880 100644
--- a/asterixdb/asterix-external-data/pom.xml
+++ b/asterixdb/asterix-external-data/pom.xml
@@ -598,6 +598,11 @@
<artifactId>delta-standalone_2.12</artifactId>
<version>3.0.0</version>
</dependency>
+ <dependency>
+ <groupId>org.apache.avro</groupId>
+ <artifactId>avro-mapred</artifactId>
+ <version>1.12.0</version>
+ </dependency>
</dependencies>
<!-- apply patch for HADOOP-17225 to workaround CVE-2019-10172 -->
<repositories>
diff --git
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/HDFSDataSourceFactory.java
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/HDFSDataSourceFactory.java
index 934ba1dfab..8af73428bb 100644
---
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/HDFSDataSourceFactory.java
+++
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/HDFSDataSourceFactory.java
@@ -45,6 +45,7 @@ import org.apache.asterix.external.api.IRecordReaderFactory;
import
org.apache.asterix.external.input.filter.embedder.IExternalFilterValueEmbedder;
import
org.apache.asterix.external.input.record.reader.abstracts.AbstractExternalInputStreamFactory;
import org.apache.asterix.external.input.record.reader.hdfs.HDFSRecordReader;
+import
org.apache.asterix.external.input.record.reader.hdfs.avro.AvroFileRecordReader;
import
org.apache.asterix.external.input.record.reader.hdfs.parquet.ParquetFileRecordReader;
import
org.apache.asterix.external.input.record.reader.stream.StreamRecordReader;
import org.apache.asterix.external.input.stream.HDFSInputStream;
@@ -54,6 +55,7 @@ import org.apache.asterix.external.util.ExternalDataConstants;
import org.apache.asterix.external.util.ExternalDataPrefix;
import org.apache.asterix.external.util.ExternalDataUtils;
import org.apache.asterix.external.util.HDFSUtils;
+import org.apache.avro.generic.GenericRecord;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
@@ -197,6 +199,8 @@ public class HDFSDataSourceFactory implements
IRecordReaderFactory<Object>, IExt
reader.close();
} else if
(formatString.equals(ExternalDataConstants.FORMAT_PARQUET)) {
recordClass = IValueReference.class;
+ } else if (formatString.equals(ExternalDataConstants.FORMAT_AVRO))
{
+ recordClass = GenericRecord.class;
} else {
recordReaderClazz =
StreamRecordReaderProvider.getRecordReaderClazz(configuration);
this.recordClass = char[].class;
@@ -356,6 +360,9 @@ public class HDFSDataSourceFactory implements
IRecordReaderFactory<Object>, IExt
if (configuration.get(ExternalDataConstants.KEY_INPUT_FORMAT).trim()
.equals(ExternalDataConstants.INPUT_FORMAT_PARQUET)) {
return new ParquetFileRecordReader<>(read, inputSplits,
readSchedule, nodeName, conf, context, ugi);
+ } else if
(configuration.get(ExternalDataConstants.KEY_INPUT_FORMAT).trim()
+ .equals(ExternalDataConstants.INPUT_FORMAT_AVRO)) {
+ return new AvroFileRecordReader<>(read, inputSplits, readSchedule,
nodeName, conf, context, ugi);
} else {
return new HDFSRecordReader<>(read, inputSplits, readSchedule,
nodeName, conf, ugi);
}
diff --git
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/hdfs/AbstractHDFSRecordReader.java
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/hdfs/AbstractHDFSRecordReader.java
index 04c80d1d72..352f118979 100644
---
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/hdfs/AbstractHDFSRecordReader.java
+++
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/hdfs/AbstractHDFSRecordReader.java
@@ -86,14 +86,11 @@ public abstract class AbstractHDFSRecordReader<K, V>
implements IRecordReader<V>
nextInputSplit();
}
- if (reader.next(key, value)) {
- return true;
- }
- while (nextInputSplit()) {
- if (reader.next(key, value)) {
+ do {
+ if (readerHasNext()) {
return true;
}
- }
+ } while (nextInputSplit());
return false;
}
@@ -103,6 +100,10 @@ public abstract class AbstractHDFSRecordReader<K, V>
implements IRecordReader<V>
return record;
}
+ protected boolean readerHasNext() throws IOException {
+ return reader.next(key, value);
+ }
+
private boolean nextInputSplit() throws IOException {
for (; currentSplitIndex < inputSplits.length; currentSplitIndex++) {
/**
@@ -128,14 +129,18 @@ public abstract class AbstractHDFSRecordReader<K, V>
implements IRecordReader<V>
continue;
}
- reader.close();
- reader = getRecordReader(currentSplitIndex);
+ closeRecordReader();
+ setRecordReader(currentSplitIndex);
return true;
}
}
return false;
}
+ protected void closeRecordReader() throws IOException {
+ reader.close();
+ }
+
/**
* Returns true if need to go to next split without closing the current
reader
*
@@ -143,7 +148,7 @@ public abstract class AbstractHDFSRecordReader<K, V>
implements IRecordReader<V>
*/
protected abstract boolean onNextInputSplit() throws IOException;
- protected abstract RecordReader<K, V> getRecordReader(int splitIndex)
throws IOException;
+ protected abstract void setRecordReader(int splitIndex) throws IOException;
@Override
public boolean stop() {
diff --git
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/hdfs/HDFSRecordReader.java
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/hdfs/HDFSRecordReader.java
index 6f338c42d6..f143188fcb 100644
---
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/hdfs/HDFSRecordReader.java
+++
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/hdfs/HDFSRecordReader.java
@@ -37,7 +37,7 @@ public class HDFSRecordReader<K, V extends Writable> extends
AbstractHDFSRecordR
}
@Override
- protected RecordReader<K, V> getRecordReader(int splitIndex) throws
IOException {
+ protected void setRecordReader(int splitIndex) throws IOException {
try {
reader = ugi == null ? getReader(splitIndex)
: ugi.doAs((PrivilegedExceptionAction<RecordReader<K, V>>)
() -> getReader(splitIndex));
@@ -48,7 +48,6 @@ public class HDFSRecordReader<K, V extends Writable> extends
AbstractHDFSRecordR
key = reader.createKey();
value = reader.createValue();
}
- return reader;
}
@SuppressWarnings("unchecked")
diff --git
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/hdfs/avro/AvroFileInputFormat.java
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/hdfs/avro/AvroFileInputFormat.java
new file mode 100644
index 0000000000..3c279a4ea1
--- /dev/null
+++
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/hdfs/avro/AvroFileInputFormat.java
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.input.record.reader.hdfs.avro;
+
+import org.apache.avro.mapred.AvroInputFormat;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.compress.CompressionCodec;
+import org.apache.hadoop.io.compress.CompressionCodecFactory;
+import org.apache.hadoop.io.compress.SplittableCompressionCodec;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.JobConfigurable;
+
+public class AvroFileInputFormat<T> extends AvroInputFormat<T> implements
JobConfigurable {
+ private CompressionCodecFactory compressionCodecs = null;
+
+ public void configure(JobConf conf) {
+ compressionCodecs = new CompressionCodecFactory(conf);
+ }
+
+ @Override
+ public boolean isSplitable(FileSystem fs, Path file) {
+ final CompressionCodec codec = compressionCodecs.getCodec(file);
+ if (null == codec) {
+ return true;
+ }
+ return codec instanceof SplittableCompressionCodec;
+ }
+}
diff --git
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/hdfs/avro/AvroFileRecordReader.java
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/hdfs/avro/AvroFileRecordReader.java
new file mode 100644
index 0000000000..e5056663bb
--- /dev/null
+++
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/hdfs/avro/AvroFileRecordReader.java
@@ -0,0 +1,156 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.input.record.reader.hdfs.avro;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.security.PrivilegedExceptionAction;
+import java.util.zip.GZIPInputStream;
+
+import org.apache.asterix.common.exceptions.ErrorCode;
+import org.apache.asterix.common.exceptions.RuntimeDataException;
+import org.apache.asterix.external.api.IExternalDataRuntimeContext;
+import org.apache.asterix.external.api.IRawRecord;
+import
org.apache.asterix.external.input.filter.embedder.IExternalFilterValueEmbedder;
+import org.apache.asterix.external.input.record.GenericRecord;
+import
org.apache.asterix.external.input.record.reader.hdfs.AbstractHDFSRecordReader;
+import org.apache.asterix.external.input.record.reader.hdfs.EmptyRecordReader;
+import org.apache.avro.InvalidAvroMagicException;
+import org.apache.avro.file.DataFileStream;
+import org.apache.avro.generic.GenericDatumReader;
+import org.apache.avro.mapred.AvroWrapper;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapred.FileSplit;
+import org.apache.hadoop.mapred.InputSplit;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.RecordReader;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.api.util.ExceptionUtils;
+
+public class AvroFileRecordReader<T> extends AbstractHDFSRecordReader<Void, T>
{
+
+ private AvroWrapper<T> key;
+ private NullWritable value;
+ private final IRawRecord<T> record;
+ private RecordReader<AvroWrapper<T>, NullWritable> reader;
+ private final IExternalFilterValueEmbedder valueEmbedder;
+ private boolean isCompressed = false;
+ private DataFileStream<T> dataFileStream;
+ private FileSystem fs;
+ private InputStream in;
+
+ public AvroFileRecordReader(boolean[] read, InputSplit[] inputSplits,
String[] readSchedule, String nodeName,
+ JobConf conf, IExternalDataRuntimeContext context,
UserGroupInformation ugi) {
+ super(read, inputSplits, readSchedule, nodeName, conf, ugi);
+ reader = new EmptyRecordReader<>();
+ record = new GenericRecord<>();
+ valueEmbedder = context.getValueEmbedder();
+ }
+
+ @Override
+ protected boolean onNextInputSplit() {
+ return false;
+ }
+
+ @Override
+ @SuppressWarnings("unchecked")
+ protected void setRecordReader(int splitIndex) throws IOException {
+ try {
+ String filePath = getPath(inputSplits[splitIndex]);
+ valueEmbedder.setPath(filePath);
+ if (StringUtils.endsWithIgnoreCase(filePath, ".gz") ||
StringUtils.endsWithIgnoreCase(filePath, ".gzip")) {
+ isCompressed = true;
+ fs = ugi == null ? FileSystem.get(conf)
+ : ugi.doAs((PrivilegedExceptionAction<FileSystem>) ()
-> FileSystem.get(conf));
+ in = new GZIPInputStream(fs.open(new Path(filePath)));
+ GenericDatumReader<T> datumReader = new GenericDatumReader<>();
+ dataFileStream = new DataFileStream<>(in, datumReader);
+ } else {
+ isCompressed = false;
+ reader = (RecordReader<AvroWrapper<T>, NullWritable>) (ugi ==
null
+ ? inputFormat.getRecordReader(inputSplits[splitIndex],
conf, Reporter.NULL)
+ : ugi.doAs((PrivilegedExceptionAction<?>) () ->
inputFormat
+ .getRecordReader(inputSplits[splitIndex],
conf, Reporter.NULL)));
+ if (key == null) {
+ key = reader.createKey();
+ value = reader.createValue();
+ }
+ }
+ } catch (InterruptedException ex) {
+ throw HyracksDataException.create(ex);
+ } catch (InvalidAvroMagicException ex) {
+ throw RuntimeDataException.create(ErrorCode.EXTERNAL_SOURCE_ERROR,
ex,
+ ExceptionUtils.getMessageOrToString(ex));
+ }
+ }
+
+ @Override
+ protected void closeRecordReader() throws IOException {
+ if (isCompressed) {
+ dataFileStream.close();
+ in.close();
+ fs.close();
+ } else {
+ reader.close();
+ }
+ }
+
+ @Override
+ public void close() throws IOException {
+ if (isCompressed) {
+ dataFileStream.close();
+ in.close();
+ fs.close();
+ } else {
+ reader.close();
+ }
+ }
+
+ @Override
+ protected boolean readerHasNext() throws IOException {
+ if (isCompressed) {
+ return dataFileStream.hasNext();
+ } else {
+ return reader.next(key, value);
+ }
+ }
+
+ @Override
+ public IRawRecord<T> next() throws IOException {
+ if (isCompressed) {
+ record.set(dataFileStream.next());
+ } else {
+ record.set(key.datum());
+ }
+ return record;
+ }
+
+ private String getPath(InputSplit split) {
+ if (split instanceof FileSplit) {
+ return ((FileSplit) split).getPath().toString();
+ } else {
+ return split.toString();
+ }
+ }
+}
diff --git
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/hdfs/parquet/ParquetFileRecordReader.java
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/hdfs/parquet/ParquetFileRecordReader.java
index c11885a18d..1d123b61d1 100644
---
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/hdfs/parquet/ParquetFileRecordReader.java
+++
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/hdfs/parquet/ParquetFileRecordReader.java
@@ -62,7 +62,7 @@ public class ParquetFileRecordReader<V extends
IValueReference> extends Abstract
@SuppressWarnings("unchecked")
@Override
- protected RecordReader<Void, V> getRecordReader(int splitIndex) throws
IOException {
+ protected void setRecordReader(int splitIndex) throws IOException {
try {
ParquetRecordReaderWrapper readerWrapper = ugi == null ?
getReader(splitIndex)
:
ugi.doAs((PrivilegedExceptionAction<ParquetRecordReaderWrapper>) () ->
getReader(splitIndex));
@@ -75,7 +75,6 @@ public class ParquetFileRecordReader<V extends
IValueReference> extends Abstract
if (value == null) {
value = reader.createValue();
}
- return reader;
}
private ParquetRecordReaderWrapper getReader(int splitIndex) throws
IOException {
diff --git
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
index d487e6868f..46a1b5b7a4 100644
---
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
+++
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
@@ -24,6 +24,7 @@ import java.util.function.LongSupplier;
import java.util.function.Supplier;
import java.util.regex.Pattern;
+import
org.apache.asterix.external.input.record.reader.hdfs.avro.AvroFileInputFormat;
import org.apache.asterix.om.types.ATypeTag;
import org.apache.hyracks.util.StorageUtil;
@@ -176,16 +177,21 @@ public class ExternalDataConstants {
public static final String CLASS_NAME_SEQUENCE_INPUT_FORMAT =
"org.apache.hadoop.mapred.SequenceFileInputFormat";
public static final String CLASS_NAME_PARQUET_INPUT_FORMAT =
"org.apache.asterix.external.input.record.reader.hdfs.parquet.MapredParquetInputFormat";
+ public static final String CLASS_NAME_AVRO_INPUT_FORMAT =
AvroFileInputFormat.class.getName();
public static final String CLASS_NAME_HDFS_FILESYSTEM =
"org.apache.hadoop.hdfs.DistributedFileSystem";
public static final String S3A_CHANGE_DETECTION_REQUIRED =
"requireVersionChangeDetection";
public static final String S3A_CHANGE_DETECTION_REQUIRED_CONFIG_KEY =
"fs.s3a.change.detection.version.required";
public static final String HDFS_IO_COMPRESSION_CODECS_KEY =
"io.compression.codecs";
+ public static final String HDFS_AVRO_IGNORE_INPUTS_WITHOUT_EXTENSION =
+ "avro.mapred.ignore.inputs.without.extension";
+
/**
* input formats aliases
*/
public static final String INPUT_FORMAT_TEXT = "text-input-format";
public static final String INPUT_FORMAT_SEQUENCE = "sequence-input-format";
public static final String INPUT_FORMAT_PARQUET = "parquet-input-format";
+ public static final String INPUT_FORMAT_AVRO = "avro-input-format";
public static final String HDFS_BLOCKSIZE = "blocksize";
public static final String HDFS_REPLICATION = "replication";
diff --git
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java
index 540de09b8e..c7deb7c762 100644
---
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java
+++
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java
@@ -1139,6 +1139,7 @@ public class ExternalDataUtils {
protocol = nodePathPair[0];
break;
case ExternalDataConstants.KEY_ADAPTER_NAME_HDFS:
+ // Remove trailing slashes as prefixes/paths in hdfs start
with a slash (absolute paths)
return
configurations.get(ExternalDataConstants.KEY_HDFS_URL).replaceAll("/+$", "");
default:
return "";
diff --git
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/HDFSUtils.java
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/HDFSUtils.java
index 35f2a9453f..7c7e031768 100644
---
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/HDFSUtils.java
+++
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/HDFSUtils.java
@@ -203,6 +203,8 @@ public class HDFSUtils {
return ExternalDataConstants.CLASS_NAME_SEQUENCE_INPUT_FORMAT;
case ExternalDataConstants.INPUT_FORMAT_PARQUET:
return ExternalDataConstants.CLASS_NAME_PARQUET_INPUT_FORMAT;
+ case ExternalDataConstants.INPUT_FORMAT_AVRO:
+ return ExternalDataConstants.CLASS_NAME_AVRO_INPUT_FORMAT;
default:
return inputFormatParameter;
}
@@ -252,6 +254,9 @@ public class HDFSUtils {
if
(ExternalDataConstants.CLASS_NAME_PARQUET_INPUT_FORMAT.equals(formatClassName))
{
configureParquet(configuration, conf);
+ } else if
(ExternalDataConstants.CLASS_NAME_AVRO_INPUT_FORMAT.equals(formatClassName)) {
+
conf.set(ExternalDataConstants.HDFS_AVRO_IGNORE_INPUTS_WITHOUT_EXTENSION,
ExternalDataConstants.FALSE);
+ disableHadoopFileSystemCache(conf,
ExternalDataConstants.READER_HDFS);
}
if
(configuration.containsKey(ExternalDataConstants.S3A_CHANGE_DETECTION_REQUIRED))
{