IMPALA-3314: Fix Avro schema loading for partitioned tables. Bug: Commit 6f31c7 fixed a crash when setting Avro schemas for tables with storage altered to Avro file format. However the fix was incomplete for partitioned/multi file format tables since 'hasAvroData_' is not set for all code paths that load the partitioned tables (For example: HdfsTable#loadAllPartitions()).
Fix: Moved the code for setting 'hasAvroData_' to addPartition() which is the common logic for all code paths adding new partitions. Also fixed the test coverage gap by adding a new test for partitioned tables altered to Avro format. Change-Id: I7854ff002b2277ec4a5388216218a1d5ad142de8 Reviewed-on: http://gerrit.cloudera.org:8080/5388 Reviewed-by: Alex Behm <[email protected]> Tested-by: Internal Jenkins Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/bb633393 Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/bb633393 Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/bb633393 Branch: refs/heads/hadoop-next Commit: bb633393775691807843a2b6bac28b1750c2c5da Parents: f83652c Author: Bharath Vissapragada <[email protected]> Authored: Tue Dec 6 14:48:49 2016 -0800 Committer: Internal Jenkins <[email protected]> Committed: Wed Dec 7 09:45:11 2016 +0000 ---------------------------------------------------------------------- .../org/apache/impala/catalog/HdfsTable.java | 2 +- .../queries/QueryTest/avro-schema-changes.test | 38 ++++++++++++++++++++ 2 files changed, 39 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/bb633393/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java b/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java index 386ef79..ae5e811 100644 --- a/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java +++ b/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java @@ -894,6 +894,7 @@ public class HdfsTable extends Table { throw new CatalogException(String.format("Partition %s already exists in table %s", partition.getPartitionName(), getFullName())); } + if (partition.getFileFormat() == HdfsFileFormat.AVRO) hasAvroData_ = true; partitionMap_.put(partition.getId(), partition); totalHdfsBytes_ += partition.getSize(); numHdfsFiles_ += partition.getNumFileDescriptors(); @@ -1430,7 +1431,6 @@ public class HdfsTable extends Table { // If the partition is null, its HDFS path does not exist, and it was not added to // this table's partition list. Skip the partition. if (partition == null) continue; - if (partition.getFileFormat() == HdfsFileFormat.AVRO) hasAvroData_ = true; if (msPartition.getParameters() != null) { partition.setNumRows(getRowCount(msPartition.getParameters())); } http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/bb633393/testdata/workloads/functional-query/queries/QueryTest/avro-schema-changes.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-query/queries/QueryTest/avro-schema-changes.test b/testdata/workloads/functional-query/queries/QueryTest/avro-schema-changes.test index 8233a02..14f0549 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/avro-schema-changes.test +++ b/testdata/workloads/functional-query/queries/QueryTest/avro-schema-changes.test @@ -39,6 +39,44 @@ select count(*) from alltypesagg_staleschema bigint ==== ---- QUERY +# Same as above but for partitioned tables. +CREATE EXTERNAL TABLE alltypesagg_staleschema_part ( + id INT, + bool_col BOOLEAN, + tinyint_col INT, + smallint_col INT, + int_col INT, + bigint_col BIGINT, + float_col FLOAT, + double_col DOUBLE, + date_string_col STRING, + string_col STRING, + timestamp_col STRING +) partitioned by (part_col int) +TBLPROPERTIES ('avro.schema.url'= '$FILESYSTEM_PREFIX/test-warehouse/avro_schemas/functional/alltypesaggmultifilesnopart.json') +==== +---- QUERY +alter table alltypesagg_staleschema_part add partition (part_col=1) location '$FILESYSTEM_PREFIX/test-warehouse/alltypesaggmultifilesnopart_avro_snap' +==== +---- QUERY +alter table alltypesagg_staleschema_part partition (part_col=1) set fileformat avro +==== +---- QUERY +select count(*) from alltypesagg_staleschema_part +---- CATCH +Missing Avro schema in scan node. This could be due to stale metadata. +==== +---- QUERY +invalidate metadata alltypesagg_staleschema_part +==== +---- QUERY +select count(*) from alltypesagg_staleschema_part +---- RESULTS +11000 +---- TYPES +bigint +==== +---- QUERY # IMPALA-3092. Create an Avro table without column definitions and add columns via ALTER # TABLE. Querying the table should work. CREATE EXTERNAL TABLE avro_alter_table_add_new_column (
