Repository: incubator-impala Updated Branches: refs/heads/master 140220323 -> 893e6f498
CDH-41511: Fix the preconditions check in HdfsTable#setAvroSchema() In HdfsTable#setAvroSchema(), we rely on an incorrect precondition check(!nonPartFieldSchemas_.isEmpty()) to make sure loadSchema() is called. However nonPartFieldSchemas_ can still be empty for Avro tables created using older versions of Hive. We take care of this in the schema reconciliation step where we read the Avro schema to populate the column list. This incorrect check prevents the Catalog from reaching the schema reconciliation code path. Fix: Added a new flag isSchemaLoaded_ and setAvroSchema() uses it to check if the schema has been loaded properly. Testing: I couldn't reproduce the issue directly by creating Avro tables without any columns. So, I manually deleted the entries from "COLUMNS_V2" table in the hms db to make sure to hit the incorrect preconditions check. Applying this patch fixed it and Impala loaded the table correctly and I can query it now. Change-Id: I02b6852385d4f4b470f763308817991e60557060 Reviewed-on: http://gerrit.cloudera.org:8080/3446 Reviewed-by: Bharath Vissapragada <[email protected]> Reviewed-by: Marcel Kornacker <[email protected]> Reviewed-by: Dan Hecht <[email protected]> Tested-by: Bharath Vissapragada <[email protected]> Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/893e6f49 Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/893e6f49 Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/893e6f49 Branch: refs/heads/master Commit: 893e6f498b3743de71735eb1dd0178382b80c3bd Parents: 1402203 Author: Bharath Vissapragada <[email protected]> Authored: Tue Jun 21 23:31:39 2016 -0700 Committer: Tim Armstrong <[email protected]> Committed: Wed Jun 22 11:38:46 2016 -0700 ---------------------------------------------------------------------- fe/src/main/java/com/cloudera/impala/catalog/HdfsTable.java | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/893e6f49/fe/src/main/java/com/cloudera/impala/catalog/HdfsTable.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/com/cloudera/impala/catalog/HdfsTable.java b/fe/src/main/java/com/cloudera/impala/catalog/HdfsTable.java index 6b70a40..1457bb6 100644 --- a/fe/src/main/java/com/cloudera/impala/catalog/HdfsTable.java +++ b/fe/src/main/java/com/cloudera/impala/catalog/HdfsTable.java @@ -207,6 +207,10 @@ public class HdfsTable extends Table { // operation), when only non-partition columns are required. private final List<FieldSchema> nonPartFieldSchemas_ = Lists.newArrayList(); + // Flag to check if the table schema has been loaded. Used as a precondition + // for setAvroSchema(). + private boolean isSchemaLoaded_ = false; + private final static Logger LOG = LoggerFactory.getLogger(HdfsTable.class); // Caching this configuration object makes calls to getFileSystem much quicker @@ -1255,7 +1259,7 @@ public class HdfsTable extends Table { */ private void setAvroSchema(HiveMetaStoreClient client, org.apache.hadoop.hive.metastore.api.Table msTbl) throws Exception { - Preconditions.checkState(!nonPartFieldSchemas_.isEmpty()); + Preconditions.checkState(isSchemaLoaded_); String inputFormat = msTbl.getSd().getInputFormat(); if (HdfsFileFormat.fromJavaClassName(inputFormat) == HdfsFileFormat.AVRO || hasAvroData_) { @@ -1339,6 +1343,7 @@ public class HdfsTable extends Table { addColumnsFromFieldSchemas(msTbl.getPartitionKeys()); addColumnsFromFieldSchemas(nonPartFieldSchemas_); loadAllColumnStats(client); + isSchemaLoaded_ = true; } /**
