IMPALA-5325: Do not update totalHdfsBytes_/numHdfsFiles_ on Catalogd

We need not account these hdfs table metrics on the Catalog
server as they are eventually calculated again on the Impalads
while unpacking the corresponding thrift table.

This fix can potentially affect the frontend tests that directly load
the Catalog's version of HdfsTable without the loadFromThrift() call
paths that do the accounting. That is fixed by adding a separate call
that computes these values and is called from
ImpaladTestCatalog.getTable().

Testing: Enough tests already cover these code paths like show stats/
table or partition refresh tests etc. No new tests are added.

Change-Id: I03cc6f9e9b2c03cafb87029ea0802dfdb2745be1
Reviewed-on: http://gerrit.cloudera.org:8080/6970
Reviewed-by: Bharath Vissapragada <[email protected]>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/5d59d854
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/5d59d854
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/5d59d854

Branch: refs/heads/master
Commit: 5d59d854d0274c89debb6f654f5cb3a0d2ef48e5
Parents: 4e54979
Author: Bharath Vissapragada <[email protected]>
Authored: Tue May 23 17:45:20 2017 -0700
Committer: Impala Public Jenkins <[email protected]>
Committed: Fri May 26 09:52:46 2017 +0000

----------------------------------------------------------------------
 .../org/apache/impala/catalog/HdfsTable.java    | 33 +++++++++++++-------
 .../impala/testutil/ImpaladTestCatalog.java     |  7 ++++-
 2 files changed, 27 insertions(+), 13 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/5d59d854/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java 
b/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java
index 23a8c96..d8d8d4a 100644
--- a/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java
+++ b/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java
@@ -165,10 +165,12 @@ public class HdfsTable extends Table {
 
   private HdfsPartitionLocationCompressor partitionLocationCompressor_;
 
-  // Total number of Hdfs files in this table. Set in load().
+  // Total number of Hdfs files in this table. Accounted only in the Impalad 
catalog
+  // cache. Set to -1 on Catalogd.
   private long numHdfsFiles_;
 
-  // Sum of sizes of all Hdfs files in this table. Set in load().
+  // Sum of sizes of all Hdfs files in this table. Accounted only in the 
Impalad
+  // catalog cache. Set to -1 on Catalogd.
   private long totalHdfsBytes_;
 
   // True iff the table's partitions are located on more than one filesystem.
@@ -231,6 +233,21 @@ public class HdfsTable extends Table {
   }
 
   /**
+   * Updates numHdfsFiles_ and totalHdfsBytes_ based on the partition 
information.
+   * This is used only for the frontend tests that do not spawn a separate 
Catalog
+   * instance.
+   */
+  public void computeHdfsStatsForTesting() {
+    Preconditions.checkState(numHdfsFiles_ == -1 && totalHdfsBytes_ == -1);
+    numHdfsFiles_ = 0;
+    totalHdfsBytes_ = 0;
+    for (HdfsPartition partition: partitionMap_.values()) {
+      numHdfsFiles_ += partition.getNumFileDescriptors();
+      totalHdfsBytes_ += partition.getSize();
+    }
+  }
+
+  /**
    * Drops and re-loads the block metadata for all partitions in 'partsByPath' 
whose
    * location is under the given 'dirPath'. It involves the following steps:
    * - Clear the current block metadata of the partitions.
@@ -305,8 +322,6 @@ public class HdfsTable extends Table {
         // Update the partitions' metadata that this file belongs to.
         for (HdfsPartition partition: partitions) {
           partition.getFileDescriptors().add(fd);
-          numHdfsFiles_++;
-          totalHdfsBytes_ += fd.getFileLength();
         }
       }
 
@@ -369,8 +384,6 @@ public class HdfsTable extends Table {
       // Update the partitions' metadata that this file belongs to.
       for (HdfsPartition partition: partitions) {
         partition.getFileDescriptors().add(fd);
-        numHdfsFiles_++;
-        totalHdfsBytes_ += fd.getFileLength();
       }
     }
   }
@@ -726,8 +739,6 @@ public class HdfsTable extends Table {
         newPartSizeBytes += fileStatus.getLen();
       }
       partition.setFileDescriptors(newFileDescs);
-      numHdfsFiles_ += newFileDescs.size();
-      totalHdfsBytes_ += newPartSizeBytes;
     } catch(IOException e) {
       throw new CatalogException("Error loading block metadata for partition " 
+
           partition.toString(), e);
@@ -1072,6 +1083,8 @@ public class HdfsTable extends Table {
       }
       if (loadTableSchema) setAvroSchema(client, msTbl);
       updateStatsFromHmsTable(msTbl);
+      numHdfsFiles_ = -1;
+      totalHdfsBytes_ = -1;
     } catch (TableLoadingException e) {
       throw e;
     } catch (Exception e) {
@@ -1477,10 +1490,6 @@ public class HdfsTable extends Table {
     Path partDirPath = new Path(storageDescriptor.getLocation());
     FileSystem fs = partDirPath.getFileSystem(CONF);
     if (!fs.exists(partDirPath)) return;
-
-    numHdfsFiles_ -= partition.getNumFileDescriptors();
-    totalHdfsBytes_ -= partition.getSize();
-    Preconditions.checkState(numHdfsFiles_ >= 0 && totalHdfsBytes_ >= 0);
     refreshFileMetadata(partition);
   }
 

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/5d59d854/fe/src/test/java/org/apache/impala/testutil/ImpaladTestCatalog.java
----------------------------------------------------------------------
diff --git 
a/fe/src/test/java/org/apache/impala/testutil/ImpaladTestCatalog.java 
b/fe/src/test/java/org/apache/impala/testutil/ImpaladTestCatalog.java
index b3167ce..fdc64e6 100644
--- a/fe/src/test/java/org/apache/impala/testutil/ImpaladTestCatalog.java
+++ b/fe/src/test/java/org/apache/impala/testutil/ImpaladTestCatalog.java
@@ -24,6 +24,7 @@ import org.apache.impala.catalog.Db;
 import org.apache.impala.catalog.HdfsCachePool;
 import org.apache.impala.catalog.ImpaladCatalog;
 import org.apache.impala.catalog.Table;
+import org.apache.impala.catalog.HdfsTable;
 import org.apache.impala.util.PatternMatcher;
 import com.google.common.base.Preconditions;
 
@@ -86,6 +87,10 @@ public class ImpaladTestCatalog extends ImpaladCatalog {
     Db db = getDb(dbName);
     Preconditions.checkNotNull(db);
     db.addTable(newTbl);
-    return super.getTable(dbName, tableName);
+    Table resultTable = super.getTable(dbName, tableName);
+    if (resultTable instanceof HdfsTable) {
+      ((HdfsTable) resultTable).computeHdfsStatsForTesting();
+    }
+    return resultTable;
   }
 }

Reply via email to