This is an automated email from the ASF dual-hosted git repository.

michaelsmith pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 731c16c73ab20cdd0d4a03e24a1a898b0489c7ed
Author: Xuebin Su <[email protected]>
AuthorDate: Thu Oct 31 17:28:02 2024 +0800

    IMPALA-13154: Update metrics when loading an HDFS table
    
    Previously, some table metrics, such as the estimated memory usage
    and the number of files, were only updated when a "FULL" Thrift object
    of the table is requested. As a result, if a user ran a DESCRIBE
    command on a table, and then tried to find the table on the Top-N page
    of the web UI, the user would not find it.
    
    This patch fixes the issue by updating the table metrics as soon as
    an HDFS table is loaded. With this, no matter what Thrift object type of
    the table is requested, the metrics will always be updated and
    displayed on the web UI.
    
    Testing:
    - Added two custom cluster tests in test_web_pages.py to make sure that
      table stats can be viewed on the web UI after DESCRIBE, for both
      legacy and local catalog modes.
    
    Change-Id: I6e2eb503b0f61b1e6403058bc5dc78d721e7e940
    Reviewed-on: http://gerrit.cloudera.org:8080/22014
    Reviewed-by: Impala Public Jenkins <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
---
 .../apache/impala/catalog/FileMetadataLoader.java  |  11 ++
 .../org/apache/impala/catalog/HdfsPartition.java   |  49 +++++++--
 .../java/org/apache/impala/catalog/HdfsTable.java  | 114 +++++++++------------
 .../impala/catalog/IcebergFileMetadataLoader.java  |  15 ++-
 .../impala/catalog/ParallelFileMetadataLoader.java |   1 +
 .../apache/impala/testutil/ImpaladTestCatalog.java |   3 -
 tests/custom_cluster/test_web_pages.py             |  41 ++++++++
 7 files changed, 152 insertions(+), 82 deletions(-)

diff --git a/fe/src/main/java/org/apache/impala/catalog/FileMetadataLoader.java 
b/fe/src/main/java/org/apache/impala/catalog/FileMetadataLoader.java
index a5c44245e..b31053102 100644
--- a/fe/src/main/java/org/apache/impala/catalog/FileMetadataLoader.java
+++ b/fe/src/main/java/org/apache/impala/catalog/FileMetadataLoader.java
@@ -31,6 +31,7 @@ import org.apache.hadoop.fs.RemoteIterator;
 import org.apache.hadoop.hive.common.ValidTxnList;
 import org.apache.hadoop.hive.common.ValidWriteIdList;
 import org.apache.impala.catalog.HdfsPartition.FileDescriptor;
+import org.apache.impala.catalog.HdfsTable.FileMetadataStats;
 import org.apache.impala.common.FileSystemUtil;
 import org.apache.impala.common.Reference;
 import org.apache.impala.thrift.TNetworkAddress;
@@ -78,6 +79,10 @@ public class FileMetadataLoader {
   protected LoadStats loadStats_;
   protected String debugAction_;
 
+  // File statistics for the partition. It will be initialized each time 
loadInternal()
+  // gets called.
+  protected FileMetadataStats fileMetadataStats_ = null;
+
   /**
    * @param partDir the dir for which to fetch file metadata
    * @param recursive whether to recursively list files
@@ -177,6 +182,7 @@ public class FileMetadataLoader {
   private void loadInternal() throws CatalogException, IOException {
     Preconditions.checkState(loadStats_ == null, "already loaded");
     loadStats_ = new LoadStats(partDir_);
+    fileMetadataStats_ = new FileMetadataStats();
     FileSystem fs = partDir_.getFileSystem(CONF);
 
     // If we don't have any prior FDs from which we could re-use old block 
location info,
@@ -223,6 +229,7 @@ public class FileMetadataLoader {
         FileDescriptor fd = getFileDescriptor(fs, listWithLocations, 
numUnknownDiskIds,
             fileStatus);
         loadedFds_.add(Preconditions.checkNotNull(fd));
+        fileMetadataStats_.accumulate(fd);
       }
       if (writeIds_ != null) {
         loadedInsertDeltaFds_ = new ArrayList<>();
@@ -242,6 +249,10 @@ public class FileMetadataLoader {
     }
   }
 
+  public FileMetadataStats getFileMetadataStats() {
+    return fileMetadataStats_;
+  }
+
   /**
    * Return fd created by the given fileStatus or from the 
cache(oldFdsByPath_).
    */
diff --git a/fe/src/main/java/org/apache/impala/catalog/HdfsPartition.java 
b/fe/src/main/java/org/apache/impala/catalog/HdfsPartition.java
index c8a8493f2..32276be55 100644
--- a/fe/src/main/java/org/apache/impala/catalog/HdfsPartition.java
+++ b/fe/src/main/java/org/apache/impala/catalog/HdfsPartition.java
@@ -56,6 +56,7 @@ import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
 import org.apache.impala.analysis.Expr;
 import org.apache.impala.analysis.LiteralExpr;
 import org.apache.impala.analysis.PartitionKeyValue;
+import org.apache.impala.catalog.HdfsTable.FileMetadataStats;
 import org.apache.impala.catalog.events.InFlightEvents;
 import 
org.apache.impala.catalog.events.MetastoreEvents.MetastoreEventPropertyKey;
 import org.apache.impala.common.FileSystemUtil;
@@ -765,6 +766,9 @@ public class HdfsPartition extends CatalogObjectImpl
   // -1 means there is no previous refresh event happened
   private final long lastRefreshEventId_;
 
+  // File statistics corresponding to the encoded file descriptors.
+  private FileMetadataStats fileMetadataStats_ = null;
+
   /**
    * Constructor.  Needed for third party extensions that want to use their 
own builder
    * to construct the object.
@@ -785,7 +789,7 @@ public class HdfsPartition extends CatalogObjectImpl
         isMarkedCached, accessLevel, hmsParameters, 
cachedMsPartitionDescriptor,
         partitionStats, hasIncrementalStats, numRows, writeId,
         inFlightEvents, /*createEventId=*/-1L, /*lastCompactionId*/-1L,
-        /*lastRefreshEventId*/-1L);
+        /*lastRefreshEventId*/-1L, /*fileMetadataStats*/null);
   }
 
   protected HdfsPartition(HdfsTable table, long id, long prevId, String 
partName,
@@ -804,7 +808,7 @@ public class HdfsPartition extends CatalogObjectImpl
         isMarkedCached, accessLevel, hmsParameters, 
cachedMsPartitionDescriptor,
         partitionStats, hasIncrementalStats, numRows, writeId,
         inFlightEvents, /*createEventId=*/-1L, /*lastCompactionId*/-1L,
-        /*lastRefreshEventId*/-1L);
+        /*lastRefreshEventId*/-1L, /*fileMetadataStats*/null);
   }
 
   protected HdfsPartition(HdfsTable table, long id, long prevId, String 
partName,
@@ -817,7 +821,7 @@ public class HdfsPartition extends CatalogObjectImpl
       CachedHmsPartitionDescriptor cachedMsPartitionDescriptor,
       byte[] partitionStats, boolean hasIncrementalStats, long numRows, long 
writeId,
       InFlightEvents inFlightEvents, long createEventId, long lastCompactionId,
-      long lastRefreshEventId) {
+      long lastRefreshEventId, FileMetadataStats fileMetadataStats) {
     table_ = table;
     id_ = id;
     prevId_ = prevId;
@@ -844,6 +848,7 @@ public class HdfsPartition extends CatalogObjectImpl
     } else {
       partName_ = partName;
     }
+    fileMetadataStats_ = fileMetadataStats;
   }
 
   public long getCreateEventId() { return createEventId_; }
@@ -1063,6 +1068,10 @@ public class HdfsPartition extends CatalogObjectImpl
            encodedDeleteFileDescriptors_.size();
   }
 
+  public FileMetadataStats getFileMetadataStats() {
+    return Preconditions.checkNotNull(fileMetadataStats_);
+  }
+
   @Override
   public boolean hasFileDescriptors() {
     return !encodedFileDescriptors_.isEmpty() ||
@@ -1168,6 +1177,9 @@ public class HdfsPartition extends CatalogObjectImpl
 
   @Override
   public long getSize() {
+    if (fileMetadataStats_ != null) {
+      return fileMetadataStats_.totalFileBytes;
+    }
     long result = 0;
     for (HdfsPartition.FileDescriptor fileDescriptor: getFileDescriptors()) {
       result += fileDescriptor.getFileLength();
@@ -1301,6 +1313,9 @@ public class HdfsPartition extends CatalogObjectImpl
     private long lastRefreshEventId_ = -1L;
     private InFlightEvents inFlightEvents_ = new InFlightEvents();
 
+    // File statistics for the partition, initialized to all zeros.
+    private FileMetadataStats fileMetadataStats_ = new FileMetadataStats();
+
     @Nullable
     private HdfsPartition oldInstance_ = null;
     // True if we are generating a minimal partition instance for
@@ -1369,7 +1384,7 @@ public class HdfsPartition extends CatalogObjectImpl
           encodedDeleteFileDescriptors_, location_, isMarkedCached_, 
accessLevel_,
           hmsParameters_, cachedMsPartitionDescriptor_, partitionStats_,
           hasIncrementalStats_, numRows_, writeId_, inFlightEvents_, 
createEventId_,
-          lastCompactionId_, lastRefreshEventId_);
+          lastCompactionId_, lastRefreshEventId_, fileMetadataStats_);
     }
 
     public Builder setId(long id) {
@@ -1595,6 +1610,7 @@ public class HdfsPartition extends CatalogObjectImpl
     }
 
     public Builder setFileDescriptors(HdfsPartition partition) {
+      fileMetadataStats_.set(partition.getFileMetadataStats());
       encodedFileDescriptors_ = partition.encodedFileDescriptors_;
       encodedInsertFileDescriptors_ = partition.encodedInsertFileDescriptors_;
       encodedDeleteFileDescriptors_ = partition.encodedDeleteFileDescriptors_;
@@ -1622,6 +1638,13 @@ public class HdfsPartition extends CatalogObjectImpl
       return this;
     }
 
+    public void setFileMetadataStats(FileMetadataStats fileMetadataStats) {
+      // The fileMetadataStats will not be shared by more than one 
HdfsPartition even if
+      // the FileMetadataLoader is reused, because a new FileMetadataStats 
will be
+      // created each time the file metadata of a partition gets loaded.
+      fileMetadataStats_ = fileMetadataStats;
+    }
+
     public HdfsFileFormat getFileFormat() {
       return fileFormatDescriptor_.getFileFormat();
     }
@@ -1733,13 +1756,25 @@ public class HdfsPartition extends CatalogObjectImpl
       }
 
       if (thriftPartition.isSetFile_desc()) {
-        setFileDescriptors(fdsFromThrift(thriftPartition.getFile_desc()));
+        List<FileDescriptor> fds = 
fdsFromThrift(thriftPartition.getFile_desc());
+        setFileDescriptors(fds);
+        for (FileDescriptor fd: fds) {
+          fileMetadataStats_.accumulate(fd);
+        }
       }
       if (thriftPartition.isSetInsert_file_desc()) {
-        
setInsertFileDescriptors(fdsFromThrift(thriftPartition.getInsert_file_desc()));
+        List<FileDescriptor> fds = 
fdsFromThrift(thriftPartition.getInsert_file_desc());
+        setInsertFileDescriptors(fds);
+        for (FileDescriptor fd: fds) {
+          fileMetadataStats_.accumulate(fd);
+        }
       }
       if (thriftPartition.isSetDelete_file_desc()) {
-        
setDeleteFileDescriptors(fdsFromThrift(thriftPartition.getDelete_file_desc()));
+        List<FileDescriptor> fds = 
fdsFromThrift(thriftPartition.getDelete_file_desc());
+        setDeleteFileDescriptors(fds);
+        for (FileDescriptor fd: fds) {
+          fileMetadataStats_.accumulate(fd);
+        }
       }
 
       accessLevel_ = thriftPartition.isSetAccess_level() ?
diff --git a/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java 
b/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java
index 40e0750bb..610fb241f 100644
--- a/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java
+++ b/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java
@@ -58,8 +58,10 @@ import org.apache.impala.analysis.NullLiteral;
 import org.apache.impala.analysis.NumericLiteral;
 import org.apache.impala.analysis.PartitionKeyValue;
 import org.apache.impala.catalog.events.MetastoreEventsProcessor;
+import org.apache.impala.catalog.CatalogObject.ThriftObjectType;
 import org.apache.impala.catalog.HdfsPartition.FileBlock;
 import org.apache.impala.catalog.HdfsPartition.FileDescriptor;
+import org.apache.impala.catalog.HdfsTable.FileMetadataStats;
 import org.apache.impala.catalog.iceberg.GroupedContentFiles;
 import org.apache.impala.common.FileSystemUtil;
 import org.apache.impala.common.ImpalaException;
@@ -354,18 +356,11 @@ public class HdfsTable extends Table implements FeFsTable 
{
   // level.
   public final static class FileMetadataStats {
     // Number of files in a table/partition.
-    public long numFiles;
+    public long numFiles = 0;
     // Number of blocks in a table/partition.
-    public long numBlocks;
+    public long numBlocks = 0;
     // Total size (in bytes) of all files in a table/partition.
-    public long totalFileBytes;
-
-    // Unsets the storage stats to indicate that their values are unknown.
-    public void unset() {
-      numFiles = -1;
-      numBlocks = -1;
-      totalFileBytes = -1;
-    }
+    public long totalFileBytes = 0;
 
     // Initializes the values of the storage stats.
     public void init() {
@@ -379,6 +374,25 @@ public class HdfsTable extends Table implements FeFsTable {
       numBlocks = stats.numBlocks;
       totalFileBytes = stats.totalFileBytes;
     }
+
+    public void merge(FileMetadataStats other) {
+      numFiles += other.numFiles;
+      numBlocks += other.numBlocks;
+      totalFileBytes += other.totalFileBytes;
+    }
+
+    public void remove(FileMetadataStats other) {
+      numFiles -= other.numFiles;
+      numBlocks -= other.numBlocks;
+      totalFileBytes -= other.totalFileBytes;
+    }
+
+    // Accumulate the statistics of the fd into this FileMetadataStats.
+    public void accumulate(FileDescriptor fd) {
+      numBlocks += fd.getNumFileBlocks();
+      totalFileBytes += fd.getFileLength();
+      ++numFiles;
+    }
   }
 
   // Table level storage-related statistics. Depending on whether the table is 
stored in
@@ -425,21 +439,6 @@ public class HdfsTable extends Table implements FeFsTable {
     return true;
   }
 
-  /**
-   * Updates the storage stats of this table based on the partition 
information.
-   * This is used only for the frontend tests that do not spawn a separate 
Catalog
-   * instance.
-   */
-  public void computeHdfsStatsForTesting() {
-    Preconditions.checkState(fileMetadataStats_.numFiles == -1
-        && fileMetadataStats_.totalFileBytes == -1);
-    fileMetadataStats_.init();
-    for (HdfsPartition partition: partitionMap_.values()) {
-      fileMetadataStats_.numFiles += partition.getNumFileDescriptors();
-      fileMetadataStats_.totalFileBytes += partition.getSize();
-    }
-  }
-
   @Override
   public TCatalogObjectType getCatalogObjectType() {
     return TCatalogObjectType.TABLE;
@@ -1039,8 +1038,7 @@ public class HdfsTable extends Table implements FeFsTable 
{
     if (partitionMap_.containsKey(partition.getId())) return false;
     if (partition.getFileFormat() == HdfsFileFormat.AVRO) hasAvroData_ = true;
     partitionMap_.put(partition.getId(), partition);
-    fileMetadataStats_.totalFileBytes += partition.getSize();
-    fileMetadataStats_.numFiles += partition.getNumFileDescriptors();
+    fileMetadataStats_.merge(partition.getFileMetadataStats());
     updatePartitionMdAndColStats(partition);
     lastCompactionId_ = Math.max(lastCompactionId_, 
partition.getLastCompactionId());
     return true;
@@ -1132,8 +1130,7 @@ public class HdfsTable extends Table implements FeFsTable 
{
   private HdfsPartition dropPartition(HdfsPartition partition,
       boolean removeCacheDirective) {
     if (partition == null) return null;
-    fileMetadataStats_.totalFileBytes -= partition.getSize();
-    fileMetadataStats_.numFiles -= partition.getNumFileDescriptors();
+    fileMetadataStats_.remove(partition.getFileMetadataStats());
     Preconditions.checkArgument(partition.getPartitionValues().size() ==
         numClusteringCols_);
     Long partitionId = partition.getId();
@@ -1218,6 +1215,25 @@ public class HdfsTable extends Table implements 
FeFsTable {
     prototypePartition_ = HdfsPartition.prototypePartition(this, 
hdfsStorageDescriptor);
   }
 
+  private void updateMetrics() {
+    long memUsageEstimate = 0;
+    int numPartitions = partitionMap_.values().size();
+    memUsageEstimate += numPartitions * PER_PARTITION_MEM_USAGE_BYTES;
+    FileMetadataStats newStats = new FileMetadataStats();
+    for (HdfsPartition partition: partitionMap_.values()) {
+      if (partition.hasIncrementalStats()) {
+        memUsageEstimate += getColumns().size() * STATS_SIZE_PER_COLUMN_BYTES;
+        hasIncrementalStats_ = true;
+      }
+      newStats.merge(partition.getFileMetadataStats());
+    }
+    fileMetadataStats_.set(newStats);
+    memUsageEstimate += fileMetadataStats_.numFiles * PER_FD_MEM_USAGE_BYTES +
+        fileMetadataStats_.numBlocks * PER_BLOCK_MEM_USAGE_BYTES;
+    setEstimatedMetadataSize(memUsageEstimate);
+    setNumFiles(fileMetadataStats_.numFiles);
+  }
+
   @Override
   public void load(boolean reuseMetadata, IMetaStoreClient client,
       org.apache.hadoop.hive.metastore.api.Table msTbl, String reason,
@@ -1349,7 +1365,7 @@ public class HdfsTable extends Table implements FeFsTable 
{
         if (loadParams.getLoadTableSchema()) {
           setAvroSchema(msClient, msTbl, catalogTimeline);
         }
-        fileMetadataStats_.unset();
+        updateMetrics();
         refreshLastUsedTime();
         // Make sure all the partition modifications are done.
         Preconditions.checkState(dirtyPartitions_.isEmpty());
@@ -2463,61 +2479,23 @@ public class HdfsTable extends Table implements 
FeFsTable {
    * partitions in the refPartitions set (the backend doesn't need metadata for
    * unreferenced partitions). In addition, metadata that is not used by the 
backend will
    * be omitted.
-   *
-   * To prevent the catalog from hitting an OOM error while trying to
-   * serialize large partition incremental stats, we estimate the stats size 
and filter
-   * the incremental stats data from partition objects if the estimate exceeds
-   * --inc_stats_size_limit_bytes. This function also collects storage related 
statistics
-   *  (e.g. number of blocks, files, etc) in order to compute an estimate of 
the metadata
-   *  size of this table.
    */
   public THdfsTable getTHdfsTable(ThriftObjectType type, Set<Long> 
refPartitions) {
     if (type == ThriftObjectType.FULL) {
       // "full" implies all partitions should be included.
       Preconditions.checkArgument(refPartitions == null);
     }
-    long memUsageEstimate = 0;
-    int numPartitions =
-        (refPartitions == null) ? partitionMap_.values().size() : 
refPartitions.size();
-    memUsageEstimate += numPartitions * PER_PARTITION_MEM_USAGE_BYTES;
-    FileMetadataStats stats = new FileMetadataStats();
     Map<Long, THdfsPartition> idToPartition = new HashMap<>();
     for (HdfsPartition partition: partitionMap_.values()) {
       long id = partition.getId();
       if (refPartitions == null || refPartitions.contains(id)) {
         THdfsPartition tHdfsPartition = FeCatalogUtils.fsPartitionToThrift(
             partition, type);
-        if (partition.hasIncrementalStats()) {
-          memUsageEstimate += getColumns().size() * 
STATS_SIZE_PER_COLUMN_BYTES;
-          hasIncrementalStats_ = true;
-        }
-        if (type == ThriftObjectType.FULL) {
-          Preconditions.checkState(tHdfsPartition.isSetNum_blocks() &&
-              tHdfsPartition.isSetTotal_file_size_bytes());
-          stats.numBlocks += tHdfsPartition.getNum_blocks();
-          stats.numFiles +=
-              tHdfsPartition.isSetFile_desc() ? 
tHdfsPartition.getFile_desc().size() : 0;
-          stats.numFiles += tHdfsPartition.isSetInsert_file_desc() ?
-              tHdfsPartition.getInsert_file_desc().size() : 0;
-          stats.numFiles += tHdfsPartition.isSetDelete_file_desc() ?
-              tHdfsPartition.getDelete_file_desc().size() : 0;
-          stats.totalFileBytes += tHdfsPartition.getTotal_file_size_bytes();
-        }
         idToPartition.put(id, tHdfsPartition);
       }
     }
-    if (type == ThriftObjectType.FULL) fileMetadataStats_.set(stats);
-
     THdfsPartition prototypePartition = FeCatalogUtils.fsPartitionToThrift(
         prototypePartition_, ThriftObjectType.DESCRIPTOR_ONLY);
-
-    memUsageEstimate += fileMetadataStats_.numFiles * PER_FD_MEM_USAGE_BYTES +
-        fileMetadataStats_.numBlocks * PER_BLOCK_MEM_USAGE_BYTES;
-    if (type == ThriftObjectType.FULL) {
-      // These metrics only make sense when we are collecting a FULL object.
-      setEstimatedMetadataSize(memUsageEstimate);
-      setNumFiles(fileMetadataStats_.numFiles);
-    }
     THdfsTable hdfsTable = new THdfsTable(hdfsBaseDir_, getColumnNames(),
         getNullPartitionKeyValue(), nullColumnValue_, idToPartition, 
prototypePartition);
     hdfsTable.setAvroSchema(avroSchema_);
diff --git 
a/fe/src/main/java/org/apache/impala/catalog/IcebergFileMetadataLoader.java 
b/fe/src/main/java/org/apache/impala/catalog/IcebergFileMetadataLoader.java
index b9ceff848..69ec01190 100644
--- a/fe/src/main/java/org/apache/impala/catalog/IcebergFileMetadataLoader.java
+++ b/fe/src/main/java/org/apache/impala/catalog/IcebergFileMetadataLoader.java
@@ -49,6 +49,7 @@ import org.apache.hadoop.hive.common.ValidWriteIdList;
 import org.apache.iceberg.ContentFile;
 import org.apache.impala.catalog.FeIcebergTable.Utils;
 import org.apache.impala.catalog.HdfsPartition.FileDescriptor;
+import org.apache.impala.catalog.HdfsTable.FileMetadataStats;
 import org.apache.impala.catalog.iceberg.GroupedContentFiles;
 import org.apache.impala.common.FileSystemUtil;
 import org.apache.impala.common.Reference;
@@ -121,6 +122,7 @@ public class IcebergFileMetadataLoader extends 
FileMetadataLoader {
   private void loadInternal() throws CatalogException, IOException {
     loadedFds_ = new ArrayList<>();
     loadStats_ = new LoadStats(partDir_);
+    fileMetadataStats_ = new FileMetadataStats();
 
     // Process the existing Fd ContentFile and return the newly added 
ContentFile
     Iterable<ContentFile<?>> newContentFiles = loadContentFilesWithOldFds();
@@ -144,7 +146,9 @@ public class IcebergFileMetadataLoader extends 
FileMetadataLoader {
       if (FileSystemUtil.supportsStorageIds(fsForPath)) {
         filesSupportsStorageIds.add(Pair.create(fsForPath, contentFile));
       } else {
-        loadedFds_.add(createFd(fsForPath, contentFile, null, null));
+        FileDescriptor fd = createFd(fsForPath, contentFile, null, null);
+        loadedFds_.add(fd);
+        fileMetadataStats_.accumulate(fd);
         ++loadStats_.loadedFiles;
       }
     }
@@ -162,8 +166,10 @@ public class IcebergFileMetadataLoader extends 
FileMetadataLoader {
       Path path = FileSystemUtil.createFullyQualifiedPath(
           new Path(contentFileInfo.getSecond().path().toString()));
       FileStatus stat = nameToFileStatus.get(path);
-      loadedFds_.add(createFd(contentFileInfo.getFirst(), 
contentFileInfo.getSecond(),
-          stat, numUnknownDiskIds));
+      FileDescriptor fd = createFd(contentFileInfo.getFirst(),
+          contentFileInfo.getSecond(), stat, numUnknownDiskIds);
+      loadedFds_.add(fd);
+      fileMetadataStats_.accumulate(fd);
     }
     loadStats_.loadedFiles += filesSupportsStorageIds.size();
     loadStats_.unknownDiskIds += numUnknownDiskIds.getRef();
@@ -192,7 +198,8 @@ public class IcebergFileMetadataLoader extends 
FileMetadataLoader {
         newContentFiles.add(contentFile);
       } else {
         ++loadStats_.skippedFiles;
-        loadedFds_.add(Preconditions.checkNotNull(fd));
+        loadedFds_.add(fd);
+        fileMetadataStats_.accumulate(fd);
       }
     }
     return newContentFiles;
diff --git 
a/fe/src/main/java/org/apache/impala/catalog/ParallelFileMetadataLoader.java 
b/fe/src/main/java/org/apache/impala/catalog/ParallelFileMetadataLoader.java
index 4ee6beaed..7070884a8 100644
--- a/fe/src/main/java/org/apache/impala/catalog/ParallelFileMetadataLoader.java
+++ b/fe/src/main/java/org/apache/impala/catalog/ParallelFileMetadataLoader.java
@@ -160,6 +160,7 @@ public class ParallelFileMetadataLoader {
         } else {
           partBuilder.setFileDescriptors(loader.getLoadedFds());
         }
+        partBuilder.setFileMetadataStats(loader.getFileMetadataStats());
       }
     }
   }
diff --git 
a/fe/src/test/java/org/apache/impala/testutil/ImpaladTestCatalog.java 
b/fe/src/test/java/org/apache/impala/testutil/ImpaladTestCatalog.java
index 95c9a10ab..814e1cbc0 100644
--- a/fe/src/test/java/org/apache/impala/testutil/ImpaladTestCatalog.java
+++ b/fe/src/test/java/org/apache/impala/testutil/ImpaladTestCatalog.java
@@ -135,9 +135,6 @@ public class ImpaladTestCatalog extends ImpaladCatalog {
     }
     Preconditions.checkNotNull(newTbl);
     Preconditions.checkState(newTbl.isLoaded());
-    if (newTbl instanceof HdfsTable) {
-      ((HdfsTable) newTbl).computeHdfsStatsForTesting();
-    }
     db.addTable(newTbl);
     return newTbl;
   }
diff --git a/tests/custom_cluster/test_web_pages.py 
b/tests/custom_cluster/test_web_pages.py
index 00e6a04d9..adc8a74c4 100644
--- a/tests/custom_cluster/test_web_pages.py
+++ b/tests/custom_cluster/test_web_pages.py
@@ -32,6 +32,7 @@ from tests.common.skip import SkipIfFS
 from tests.shell.util import run_impala_shell_cmd
 
 SMALL_QUERY_LOG_SIZE_IN_BYTES = 40 * 1024
+CATALOG_URL = "http://localhost:25020/catalog";
 
 
 class TestWebPage(CustomClusterTestSuite):
@@ -505,3 +506,43 @@ class TestWebPage(CustomClusterTestSuite):
     self.execute_query("invalidate metadata")
     page = requests.get("http://localhost:25020/events";).text
     assert "Unexpected exception" not in page, "Still see error message:\n" + 
page
+
+  def _test_catalog_tables_stats_after_describe(self, table_full_name, 
num_files):
+    """Test the lists of tables with Most Number of Files and Highest Memory 
Requirements
+    in the catalog page. Start a new cluster to make sure the table is not 
loaded before
+    DESCRIBE."""
+
+    def get_table_metric(content, list_name, key):
+      table_list = content[list_name]
+      for table in table_list:
+        if table["name"] == table_full_name:
+          return table[key]
+      return None
+
+    # The table is not in the lists after the cluster starts
+    content = self.get_debug_page(CATALOG_URL + "?json")
+    assert get_table_metric(content, "large_tables", "mem_estimate") is None
+    assert get_table_metric(content, "high_file_count_tables", "num_files") is 
None
+
+    self.client.execute("DESCRIBE {0}".format(table_full_name))
+    content = self.get_debug_page(CATALOG_URL + "?json")
+    assert get_table_metric(content, "large_tables", "mem_estimate") > 0
+    assert get_table_metric(content, "high_file_count_tables", "num_files") == 
num_files
+
+  @pytest.mark.execute_serially
+  @CustomClusterTestSuite.with_args(
+    catalogd_args="--catalog_topic_mode=full",
+    impalad_args="--use_local_catalog=false")
+  def test_catalog_tables_stats_legacy_catalog(self):
+    self._test_catalog_tables_stats_after_describe("functional.alltypes", 24)
+    self._test_catalog_tables_stats_after_describe(
+        "functional_parquet.iceberg_lineitem_sixblocks", 4)
+
+  @pytest.mark.execute_serially
+  @CustomClusterTestSuite.with_args(
+    catalogd_args="--catalog_topic_mode=minimal",
+    impalad_args="--use_local_catalog=true")
+  def test_catalog_tables_stats_local_catalog(self):
+    self._test_catalog_tables_stats_after_describe("functional.alltypes", 24)
+    self._test_catalog_tables_stats_after_describe(
+        "functional_parquet.iceberg_lineitem_sixblocks", 4)

Reply via email to