IMPALA-5325: Do not update totalHdfsBytes_/numHdfsFiles_ on Catalogd We need not account these hdfs table metrics on the Catalog server as they are eventually calculated again on the Impalads while unpacking the corresponding thrift table.
This fix can potentially affect the frontend tests that directly load the Catalog's version of HdfsTable without the loadFromThrift() call paths that do the accounting. That is fixed by adding a separate call that computes these values and is called from ImpaladTestCatalog.getTable(). Testing: Enough tests already cover these code paths like show stats/ table or partition refresh tests etc. No new tests are added. Change-Id: I03cc6f9e9b2c03cafb87029ea0802dfdb2745be1 Reviewed-on: http://gerrit.cloudera.org:8080/6970 Reviewed-by: Bharath Vissapragada <[email protected]> Tested-by: Impala Public Jenkins Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/5d59d854 Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/5d59d854 Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/5d59d854 Branch: refs/heads/master Commit: 5d59d854d0274c89debb6f654f5cb3a0d2ef48e5 Parents: 4e54979 Author: Bharath Vissapragada <[email protected]> Authored: Tue May 23 17:45:20 2017 -0700 Committer: Impala Public Jenkins <[email protected]> Committed: Fri May 26 09:52:46 2017 +0000 ---------------------------------------------------------------------- .../org/apache/impala/catalog/HdfsTable.java | 33 +++++++++++++------- .../impala/testutil/ImpaladTestCatalog.java | 7 ++++- 2 files changed, 27 insertions(+), 13 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/5d59d854/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java b/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java index 23a8c96..d8d8d4a 100644 --- a/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java +++ b/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java @@ -165,10 +165,12 @@ public class HdfsTable extends Table { private HdfsPartitionLocationCompressor partitionLocationCompressor_; - // Total number of Hdfs files in this table. Set in load(). + // Total number of Hdfs files in this table. Accounted only in the Impalad catalog + // cache. Set to -1 on Catalogd. private long numHdfsFiles_; - // Sum of sizes of all Hdfs files in this table. Set in load(). + // Sum of sizes of all Hdfs files in this table. Accounted only in the Impalad + // catalog cache. Set to -1 on Catalogd. private long totalHdfsBytes_; // True iff the table's partitions are located on more than one filesystem. @@ -231,6 +233,21 @@ public class HdfsTable extends Table { } /** + * Updates numHdfsFiles_ and totalHdfsBytes_ based on the partition information. + * This is used only for the frontend tests that do not spawn a separate Catalog + * instance. + */ + public void computeHdfsStatsForTesting() { + Preconditions.checkState(numHdfsFiles_ == -1 && totalHdfsBytes_ == -1); + numHdfsFiles_ = 0; + totalHdfsBytes_ = 0; + for (HdfsPartition partition: partitionMap_.values()) { + numHdfsFiles_ += partition.getNumFileDescriptors(); + totalHdfsBytes_ += partition.getSize(); + } + } + + /** * Drops and re-loads the block metadata for all partitions in 'partsByPath' whose * location is under the given 'dirPath'. It involves the following steps: * - Clear the current block metadata of the partitions. @@ -305,8 +322,6 @@ public class HdfsTable extends Table { // Update the partitions' metadata that this file belongs to. for (HdfsPartition partition: partitions) { partition.getFileDescriptors().add(fd); - numHdfsFiles_++; - totalHdfsBytes_ += fd.getFileLength(); } } @@ -369,8 +384,6 @@ public class HdfsTable extends Table { // Update the partitions' metadata that this file belongs to. for (HdfsPartition partition: partitions) { partition.getFileDescriptors().add(fd); - numHdfsFiles_++; - totalHdfsBytes_ += fd.getFileLength(); } } } @@ -726,8 +739,6 @@ public class HdfsTable extends Table { newPartSizeBytes += fileStatus.getLen(); } partition.setFileDescriptors(newFileDescs); - numHdfsFiles_ += newFileDescs.size(); - totalHdfsBytes_ += newPartSizeBytes; } catch(IOException e) { throw new CatalogException("Error loading block metadata for partition " + partition.toString(), e); @@ -1072,6 +1083,8 @@ public class HdfsTable extends Table { } if (loadTableSchema) setAvroSchema(client, msTbl); updateStatsFromHmsTable(msTbl); + numHdfsFiles_ = -1; + totalHdfsBytes_ = -1; } catch (TableLoadingException e) { throw e; } catch (Exception e) { @@ -1477,10 +1490,6 @@ public class HdfsTable extends Table { Path partDirPath = new Path(storageDescriptor.getLocation()); FileSystem fs = partDirPath.getFileSystem(CONF); if (!fs.exists(partDirPath)) return; - - numHdfsFiles_ -= partition.getNumFileDescriptors(); - totalHdfsBytes_ -= partition.getSize(); - Preconditions.checkState(numHdfsFiles_ >= 0 && totalHdfsBytes_ >= 0); refreshFileMetadata(partition); } http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/5d59d854/fe/src/test/java/org/apache/impala/testutil/ImpaladTestCatalog.java ---------------------------------------------------------------------- diff --git a/fe/src/test/java/org/apache/impala/testutil/ImpaladTestCatalog.java b/fe/src/test/java/org/apache/impala/testutil/ImpaladTestCatalog.java index b3167ce..fdc64e6 100644 --- a/fe/src/test/java/org/apache/impala/testutil/ImpaladTestCatalog.java +++ b/fe/src/test/java/org/apache/impala/testutil/ImpaladTestCatalog.java @@ -24,6 +24,7 @@ import org.apache.impala.catalog.Db; import org.apache.impala.catalog.HdfsCachePool; import org.apache.impala.catalog.ImpaladCatalog; import org.apache.impala.catalog.Table; +import org.apache.impala.catalog.HdfsTable; import org.apache.impala.util.PatternMatcher; import com.google.common.base.Preconditions; @@ -86,6 +87,10 @@ public class ImpaladTestCatalog extends ImpaladCatalog { Db db = getDb(dbName); Preconditions.checkNotNull(db); db.addTable(newTbl); - return super.getTable(dbName, tableName); + Table resultTable = super.getTable(dbName, tableName); + if (resultTable instanceof HdfsTable) { + ((HdfsTable) resultTable).computeHdfsStatsForTesting(); + } + return resultTable; } }
