This is an automated email from the ASF dual-hosted git repository.
wzhou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
The following commit(s) were added to refs/heads/master by this push:
new 4a05eaf98 IMPALA-11807: Fix TestIcebergTable.test_avro_file_format and
test_mixed_file_format
4a05eaf98 is described below
commit 4a05eaf988f3a613ff86b934dd077c80070b4ca0
Author: noemi <[email protected]>
AuthorDate: Wed Dec 21 15:07:42 2022 +0100
IMPALA-11807: Fix TestIcebergTable.test_avro_file_format and
test_mixed_file_format
Iceberg hardcodes URIs in metadata files. If the table was written
in a certain storage location and then moved to another file system,
the hardcoded URIs will still point to the old location instead of
the current one. Therefore Impala will be unable to read the table.
TestIcebergTable.test_avro_file_format and test_mixed_file_format
use Hive from Impala to write tables. If the tables are created in
a different file system than the one they will be read from, the tests
fail due to the invalid URIs.
Skipping these 2 tests if testing is not done on HDFS.
Updated the data load schema of the 2 test tables created by Hive and
set LOCATION to the same as in the previous test tables. If this
makes it possible to rewrite the URIs in the metadata and makes the
tables accessible from another file system as well later, then the
tests can be enabled again.
Testing:
- Testing locally on HDFS minicluster
- Triggered an Ozone build to verify that it is skipped on a different
file system
Change-Id: Ie2f126de80c6e7f825d02f6814fcf69ae320a781
Reviewed-on: http://gerrit.cloudera.org:8080/19387
Reviewed-by: Impala Public Jenkins <[email protected]>
Tested-by: Impala Public Jenkins <[email protected]>
---
testdata/datasets/functional/functional_schema_template.sql | 6 ++++--
tests/common/skip.py | 2 +-
tests/query_test/test_iceberg.py | 2 ++
3 files changed, 7 insertions(+), 3 deletions(-)
diff --git a/testdata/datasets/functional/functional_schema_template.sql
b/testdata/datasets/functional/functional_schema_template.sql
index 9a3d1417c..08cb1a4aa 100644
--- a/testdata/datasets/functional/functional_schema_template.sql
+++ b/testdata/datasets/functional/functional_schema_template.sql
@@ -3616,7 +3616,8 @@ CREATE EXTERNAL TABLE IF NOT EXISTS
{db_name}{db_suffix}.{table_name} (
double_col double,
bool_col boolean
)
-STORED BY ICEBERG STORED AS AVRO;
+STORED BY ICEBERG STORED AS AVRO
+LOCATION '/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_avro_format';
INSERT INTO TABLE {db_name}{db_suffix}.{table_name} values(1, 'A', 0.5,
true),(2, 'B', 1.5, true),(3, 'C', 2.5, false);
====
---- DATASET
@@ -3631,10 +3632,11 @@ CREATE EXTERNAL TABLE IF NOT EXISTS
{db_name}{db_suffix}.{table_name} (
bool_col boolean
)
STORED BY ICEBERG
-TBLPROPERTIES('write.format.default'='avro');
+LOCATION
'/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_mixed_file_format';
---- DEPENDENT_LOAD_HIVE
-- This INSERT must run in Hive, because Impala doesn't support inserting into
tables
-- with avro and orc file formats.
+ALTER TABLE {db_name}{db_suffix}.{table_name} SET
TBLPROPERTIES('write.format.default'='avro');
INSERT INTO TABLE {db_name}{db_suffix}.{table_name} values(1, 'avro', 0.5,
true);
ALTER TABLE {db_name}{db_suffix}.{table_name} SET
TBLPROPERTIES('write.format.default'='orc');
INSERT INTO TABLE {db_name}{db_suffix}.{table_name} values(2, 'orc', 1.5,
false);
diff --git a/tests/common/skip.py b/tests/common/skip.py
index 85d2c678d..648204395 100644
--- a/tests/common/skip.py
+++ b/tests/common/skip.py
@@ -104,7 +104,7 @@ class SkipIf:
sfs_unsupported = pytest.mark.skipif(not (IS_HDFS or IS_S3 or IS_ABFS or
IS_ADLS
or IS_GCS), reason="Hive support for sfs+ is limited, HIVE-26757")
hardcoded_uris = pytest.mark.skipif(not IS_HDFS,
- reason="Iceberg delete files hardcode the full URI in parquet files")
+ reason="Iceberg hardcodes the full URI in parquet delete files and
metadata files")
not_ec = pytest.mark.skipif(not IS_EC, reason="Erasure Coding needed")
no_secondary_fs = pytest.mark.skipif(not SECONDARY_FILESYSTEM,
reason="Secondary filesystem needed")
diff --git a/tests/query_test/test_iceberg.py b/tests/query_test/test_iceberg.py
index fe41d2487..5addca6ec 100644
--- a/tests/query_test/test_iceberg.py
+++ b/tests/query_test/test_iceberg.py
@@ -786,6 +786,7 @@ class TestIcebergTable(IcebergTestSuite):
self.run_test_case('QueryTest/iceberg-multiple-storage-locations-table',
vector, unique_database)
+ @SkipIf.hardcoded_uris
def test_mixed_file_format(self, vector, unique_database):
self.run_test_case('QueryTest/iceberg-mixed-file-format', vector,
unique_database)
@@ -901,6 +902,7 @@ class TestIcebergTable(IcebergTestSuite):
def test_virtual_columns(self, vector, unique_database):
self.run_test_case('QueryTest/iceberg-virtual-columns', vector,
unique_database)
+ @SkipIf.hardcoded_uris
def test_avro_file_format(self, vector, unique_database):
self.run_test_case('QueryTest/iceberg-avro', vector, unique_database)