This is an automated email from the ASF dual-hosted git repository. michaelsmith pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit 731c16c73ab20cdd0d4a03e24a1a898b0489c7ed Author: Xuebin Su <[email protected]> AuthorDate: Thu Oct 31 17:28:02 2024 +0800 IMPALA-13154: Update metrics when loading an HDFS table Previously, some table metrics, such as the estimated memory usage and the number of files, were only updated when a "FULL" Thrift object of the table is requested. As a result, if a user ran a DESCRIBE command on a table, and then tried to find the table on the Top-N page of the web UI, the user would not find it. This patch fixes the issue by updating the table metrics as soon as an HDFS table is loaded. With this, no matter what Thrift object type of the table is requested, the metrics will always be updated and displayed on the web UI. Testing: - Added two custom cluster tests in test_web_pages.py to make sure that table stats can be viewed on the web UI after DESCRIBE, for both legacy and local catalog modes. Change-Id: I6e2eb503b0f61b1e6403058bc5dc78d721e7e940 Reviewed-on: http://gerrit.cloudera.org:8080/22014 Reviewed-by: Impala Public Jenkins <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> --- .../apache/impala/catalog/FileMetadataLoader.java | 11 ++ .../org/apache/impala/catalog/HdfsPartition.java | 49 +++++++-- .../java/org/apache/impala/catalog/HdfsTable.java | 114 +++++++++------------ .../impala/catalog/IcebergFileMetadataLoader.java | 15 ++- .../impala/catalog/ParallelFileMetadataLoader.java | 1 + .../apache/impala/testutil/ImpaladTestCatalog.java | 3 - tests/custom_cluster/test_web_pages.py | 41 ++++++++ 7 files changed, 152 insertions(+), 82 deletions(-) diff --git a/fe/src/main/java/org/apache/impala/catalog/FileMetadataLoader.java b/fe/src/main/java/org/apache/impala/catalog/FileMetadataLoader.java index a5c44245e..b31053102 100644 --- a/fe/src/main/java/org/apache/impala/catalog/FileMetadataLoader.java +++ b/fe/src/main/java/org/apache/impala/catalog/FileMetadataLoader.java @@ -31,6 +31,7 @@ import org.apache.hadoop.fs.RemoteIterator; import org.apache.hadoop.hive.common.ValidTxnList; import org.apache.hadoop.hive.common.ValidWriteIdList; import org.apache.impala.catalog.HdfsPartition.FileDescriptor; +import org.apache.impala.catalog.HdfsTable.FileMetadataStats; import org.apache.impala.common.FileSystemUtil; import org.apache.impala.common.Reference; import org.apache.impala.thrift.TNetworkAddress; @@ -78,6 +79,10 @@ public class FileMetadataLoader { protected LoadStats loadStats_; protected String debugAction_; + // File statistics for the partition. It will be initialized each time loadInternal() + // gets called. + protected FileMetadataStats fileMetadataStats_ = null; + /** * @param partDir the dir for which to fetch file metadata * @param recursive whether to recursively list files @@ -177,6 +182,7 @@ public class FileMetadataLoader { private void loadInternal() throws CatalogException, IOException { Preconditions.checkState(loadStats_ == null, "already loaded"); loadStats_ = new LoadStats(partDir_); + fileMetadataStats_ = new FileMetadataStats(); FileSystem fs = partDir_.getFileSystem(CONF); // If we don't have any prior FDs from which we could re-use old block location info, @@ -223,6 +229,7 @@ public class FileMetadataLoader { FileDescriptor fd = getFileDescriptor(fs, listWithLocations, numUnknownDiskIds, fileStatus); loadedFds_.add(Preconditions.checkNotNull(fd)); + fileMetadataStats_.accumulate(fd); } if (writeIds_ != null) { loadedInsertDeltaFds_ = new ArrayList<>(); @@ -242,6 +249,10 @@ public class FileMetadataLoader { } } + public FileMetadataStats getFileMetadataStats() { + return fileMetadataStats_; + } + /** * Return fd created by the given fileStatus or from the cache(oldFdsByPath_). */ diff --git a/fe/src/main/java/org/apache/impala/catalog/HdfsPartition.java b/fe/src/main/java/org/apache/impala/catalog/HdfsPartition.java index c8a8493f2..32276be55 100644 --- a/fe/src/main/java/org/apache/impala/catalog/HdfsPartition.java +++ b/fe/src/main/java/org/apache/impala/catalog/HdfsPartition.java @@ -56,6 +56,7 @@ import org.apache.hadoop.hive.metastore.api.StorageDescriptor; import org.apache.impala.analysis.Expr; import org.apache.impala.analysis.LiteralExpr; import org.apache.impala.analysis.PartitionKeyValue; +import org.apache.impala.catalog.HdfsTable.FileMetadataStats; import org.apache.impala.catalog.events.InFlightEvents; import org.apache.impala.catalog.events.MetastoreEvents.MetastoreEventPropertyKey; import org.apache.impala.common.FileSystemUtil; @@ -765,6 +766,9 @@ public class HdfsPartition extends CatalogObjectImpl // -1 means there is no previous refresh event happened private final long lastRefreshEventId_; + // File statistics corresponding to the encoded file descriptors. + private FileMetadataStats fileMetadataStats_ = null; + /** * Constructor. Needed for third party extensions that want to use their own builder * to construct the object. @@ -785,7 +789,7 @@ public class HdfsPartition extends CatalogObjectImpl isMarkedCached, accessLevel, hmsParameters, cachedMsPartitionDescriptor, partitionStats, hasIncrementalStats, numRows, writeId, inFlightEvents, /*createEventId=*/-1L, /*lastCompactionId*/-1L, - /*lastRefreshEventId*/-1L); + /*lastRefreshEventId*/-1L, /*fileMetadataStats*/null); } protected HdfsPartition(HdfsTable table, long id, long prevId, String partName, @@ -804,7 +808,7 @@ public class HdfsPartition extends CatalogObjectImpl isMarkedCached, accessLevel, hmsParameters, cachedMsPartitionDescriptor, partitionStats, hasIncrementalStats, numRows, writeId, inFlightEvents, /*createEventId=*/-1L, /*lastCompactionId*/-1L, - /*lastRefreshEventId*/-1L); + /*lastRefreshEventId*/-1L, /*fileMetadataStats*/null); } protected HdfsPartition(HdfsTable table, long id, long prevId, String partName, @@ -817,7 +821,7 @@ public class HdfsPartition extends CatalogObjectImpl CachedHmsPartitionDescriptor cachedMsPartitionDescriptor, byte[] partitionStats, boolean hasIncrementalStats, long numRows, long writeId, InFlightEvents inFlightEvents, long createEventId, long lastCompactionId, - long lastRefreshEventId) { + long lastRefreshEventId, FileMetadataStats fileMetadataStats) { table_ = table; id_ = id; prevId_ = prevId; @@ -844,6 +848,7 @@ public class HdfsPartition extends CatalogObjectImpl } else { partName_ = partName; } + fileMetadataStats_ = fileMetadataStats; } public long getCreateEventId() { return createEventId_; } @@ -1063,6 +1068,10 @@ public class HdfsPartition extends CatalogObjectImpl encodedDeleteFileDescriptors_.size(); } + public FileMetadataStats getFileMetadataStats() { + return Preconditions.checkNotNull(fileMetadataStats_); + } + @Override public boolean hasFileDescriptors() { return !encodedFileDescriptors_.isEmpty() || @@ -1168,6 +1177,9 @@ public class HdfsPartition extends CatalogObjectImpl @Override public long getSize() { + if (fileMetadataStats_ != null) { + return fileMetadataStats_.totalFileBytes; + } long result = 0; for (HdfsPartition.FileDescriptor fileDescriptor: getFileDescriptors()) { result += fileDescriptor.getFileLength(); @@ -1301,6 +1313,9 @@ public class HdfsPartition extends CatalogObjectImpl private long lastRefreshEventId_ = -1L; private InFlightEvents inFlightEvents_ = new InFlightEvents(); + // File statistics for the partition, initialized to all zeros. + private FileMetadataStats fileMetadataStats_ = new FileMetadataStats(); + @Nullable private HdfsPartition oldInstance_ = null; // True if we are generating a minimal partition instance for @@ -1369,7 +1384,7 @@ public class HdfsPartition extends CatalogObjectImpl encodedDeleteFileDescriptors_, location_, isMarkedCached_, accessLevel_, hmsParameters_, cachedMsPartitionDescriptor_, partitionStats_, hasIncrementalStats_, numRows_, writeId_, inFlightEvents_, createEventId_, - lastCompactionId_, lastRefreshEventId_); + lastCompactionId_, lastRefreshEventId_, fileMetadataStats_); } public Builder setId(long id) { @@ -1595,6 +1610,7 @@ public class HdfsPartition extends CatalogObjectImpl } public Builder setFileDescriptors(HdfsPartition partition) { + fileMetadataStats_.set(partition.getFileMetadataStats()); encodedFileDescriptors_ = partition.encodedFileDescriptors_; encodedInsertFileDescriptors_ = partition.encodedInsertFileDescriptors_; encodedDeleteFileDescriptors_ = partition.encodedDeleteFileDescriptors_; @@ -1622,6 +1638,13 @@ public class HdfsPartition extends CatalogObjectImpl return this; } + public void setFileMetadataStats(FileMetadataStats fileMetadataStats) { + // The fileMetadataStats will not be shared by more than one HdfsPartition even if + // the FileMetadataLoader is reused, because a new FileMetadataStats will be + // created each time the file metadata of a partition gets loaded. + fileMetadataStats_ = fileMetadataStats; + } + public HdfsFileFormat getFileFormat() { return fileFormatDescriptor_.getFileFormat(); } @@ -1733,13 +1756,25 @@ public class HdfsPartition extends CatalogObjectImpl } if (thriftPartition.isSetFile_desc()) { - setFileDescriptors(fdsFromThrift(thriftPartition.getFile_desc())); + List<FileDescriptor> fds = fdsFromThrift(thriftPartition.getFile_desc()); + setFileDescriptors(fds); + for (FileDescriptor fd: fds) { + fileMetadataStats_.accumulate(fd); + } } if (thriftPartition.isSetInsert_file_desc()) { - setInsertFileDescriptors(fdsFromThrift(thriftPartition.getInsert_file_desc())); + List<FileDescriptor> fds = fdsFromThrift(thriftPartition.getInsert_file_desc()); + setInsertFileDescriptors(fds); + for (FileDescriptor fd: fds) { + fileMetadataStats_.accumulate(fd); + } } if (thriftPartition.isSetDelete_file_desc()) { - setDeleteFileDescriptors(fdsFromThrift(thriftPartition.getDelete_file_desc())); + List<FileDescriptor> fds = fdsFromThrift(thriftPartition.getDelete_file_desc()); + setDeleteFileDescriptors(fds); + for (FileDescriptor fd: fds) { + fileMetadataStats_.accumulate(fd); + } } accessLevel_ = thriftPartition.isSetAccess_level() ? diff --git a/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java b/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java index 40e0750bb..610fb241f 100644 --- a/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java +++ b/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java @@ -58,8 +58,10 @@ import org.apache.impala.analysis.NullLiteral; import org.apache.impala.analysis.NumericLiteral; import org.apache.impala.analysis.PartitionKeyValue; import org.apache.impala.catalog.events.MetastoreEventsProcessor; +import org.apache.impala.catalog.CatalogObject.ThriftObjectType; import org.apache.impala.catalog.HdfsPartition.FileBlock; import org.apache.impala.catalog.HdfsPartition.FileDescriptor; +import org.apache.impala.catalog.HdfsTable.FileMetadataStats; import org.apache.impala.catalog.iceberg.GroupedContentFiles; import org.apache.impala.common.FileSystemUtil; import org.apache.impala.common.ImpalaException; @@ -354,18 +356,11 @@ public class HdfsTable extends Table implements FeFsTable { // level. public final static class FileMetadataStats { // Number of files in a table/partition. - public long numFiles; + public long numFiles = 0; // Number of blocks in a table/partition. - public long numBlocks; + public long numBlocks = 0; // Total size (in bytes) of all files in a table/partition. - public long totalFileBytes; - - // Unsets the storage stats to indicate that their values are unknown. - public void unset() { - numFiles = -1; - numBlocks = -1; - totalFileBytes = -1; - } + public long totalFileBytes = 0; // Initializes the values of the storage stats. public void init() { @@ -379,6 +374,25 @@ public class HdfsTable extends Table implements FeFsTable { numBlocks = stats.numBlocks; totalFileBytes = stats.totalFileBytes; } + + public void merge(FileMetadataStats other) { + numFiles += other.numFiles; + numBlocks += other.numBlocks; + totalFileBytes += other.totalFileBytes; + } + + public void remove(FileMetadataStats other) { + numFiles -= other.numFiles; + numBlocks -= other.numBlocks; + totalFileBytes -= other.totalFileBytes; + } + + // Accumulate the statistics of the fd into this FileMetadataStats. + public void accumulate(FileDescriptor fd) { + numBlocks += fd.getNumFileBlocks(); + totalFileBytes += fd.getFileLength(); + ++numFiles; + } } // Table level storage-related statistics. Depending on whether the table is stored in @@ -425,21 +439,6 @@ public class HdfsTable extends Table implements FeFsTable { return true; } - /** - * Updates the storage stats of this table based on the partition information. - * This is used only for the frontend tests that do not spawn a separate Catalog - * instance. - */ - public void computeHdfsStatsForTesting() { - Preconditions.checkState(fileMetadataStats_.numFiles == -1 - && fileMetadataStats_.totalFileBytes == -1); - fileMetadataStats_.init(); - for (HdfsPartition partition: partitionMap_.values()) { - fileMetadataStats_.numFiles += partition.getNumFileDescriptors(); - fileMetadataStats_.totalFileBytes += partition.getSize(); - } - } - @Override public TCatalogObjectType getCatalogObjectType() { return TCatalogObjectType.TABLE; @@ -1039,8 +1038,7 @@ public class HdfsTable extends Table implements FeFsTable { if (partitionMap_.containsKey(partition.getId())) return false; if (partition.getFileFormat() == HdfsFileFormat.AVRO) hasAvroData_ = true; partitionMap_.put(partition.getId(), partition); - fileMetadataStats_.totalFileBytes += partition.getSize(); - fileMetadataStats_.numFiles += partition.getNumFileDescriptors(); + fileMetadataStats_.merge(partition.getFileMetadataStats()); updatePartitionMdAndColStats(partition); lastCompactionId_ = Math.max(lastCompactionId_, partition.getLastCompactionId()); return true; @@ -1132,8 +1130,7 @@ public class HdfsTable extends Table implements FeFsTable { private HdfsPartition dropPartition(HdfsPartition partition, boolean removeCacheDirective) { if (partition == null) return null; - fileMetadataStats_.totalFileBytes -= partition.getSize(); - fileMetadataStats_.numFiles -= partition.getNumFileDescriptors(); + fileMetadataStats_.remove(partition.getFileMetadataStats()); Preconditions.checkArgument(partition.getPartitionValues().size() == numClusteringCols_); Long partitionId = partition.getId(); @@ -1218,6 +1215,25 @@ public class HdfsTable extends Table implements FeFsTable { prototypePartition_ = HdfsPartition.prototypePartition(this, hdfsStorageDescriptor); } + private void updateMetrics() { + long memUsageEstimate = 0; + int numPartitions = partitionMap_.values().size(); + memUsageEstimate += numPartitions * PER_PARTITION_MEM_USAGE_BYTES; + FileMetadataStats newStats = new FileMetadataStats(); + for (HdfsPartition partition: partitionMap_.values()) { + if (partition.hasIncrementalStats()) { + memUsageEstimate += getColumns().size() * STATS_SIZE_PER_COLUMN_BYTES; + hasIncrementalStats_ = true; + } + newStats.merge(partition.getFileMetadataStats()); + } + fileMetadataStats_.set(newStats); + memUsageEstimate += fileMetadataStats_.numFiles * PER_FD_MEM_USAGE_BYTES + + fileMetadataStats_.numBlocks * PER_BLOCK_MEM_USAGE_BYTES; + setEstimatedMetadataSize(memUsageEstimate); + setNumFiles(fileMetadataStats_.numFiles); + } + @Override public void load(boolean reuseMetadata, IMetaStoreClient client, org.apache.hadoop.hive.metastore.api.Table msTbl, String reason, @@ -1349,7 +1365,7 @@ public class HdfsTable extends Table implements FeFsTable { if (loadParams.getLoadTableSchema()) { setAvroSchema(msClient, msTbl, catalogTimeline); } - fileMetadataStats_.unset(); + updateMetrics(); refreshLastUsedTime(); // Make sure all the partition modifications are done. Preconditions.checkState(dirtyPartitions_.isEmpty()); @@ -2463,61 +2479,23 @@ public class HdfsTable extends Table implements FeFsTable { * partitions in the refPartitions set (the backend doesn't need metadata for * unreferenced partitions). In addition, metadata that is not used by the backend will * be omitted. - * - * To prevent the catalog from hitting an OOM error while trying to - * serialize large partition incremental stats, we estimate the stats size and filter - * the incremental stats data from partition objects if the estimate exceeds - * --inc_stats_size_limit_bytes. This function also collects storage related statistics - * (e.g. number of blocks, files, etc) in order to compute an estimate of the metadata - * size of this table. */ public THdfsTable getTHdfsTable(ThriftObjectType type, Set<Long> refPartitions) { if (type == ThriftObjectType.FULL) { // "full" implies all partitions should be included. Preconditions.checkArgument(refPartitions == null); } - long memUsageEstimate = 0; - int numPartitions = - (refPartitions == null) ? partitionMap_.values().size() : refPartitions.size(); - memUsageEstimate += numPartitions * PER_PARTITION_MEM_USAGE_BYTES; - FileMetadataStats stats = new FileMetadataStats(); Map<Long, THdfsPartition> idToPartition = new HashMap<>(); for (HdfsPartition partition: partitionMap_.values()) { long id = partition.getId(); if (refPartitions == null || refPartitions.contains(id)) { THdfsPartition tHdfsPartition = FeCatalogUtils.fsPartitionToThrift( partition, type); - if (partition.hasIncrementalStats()) { - memUsageEstimate += getColumns().size() * STATS_SIZE_PER_COLUMN_BYTES; - hasIncrementalStats_ = true; - } - if (type == ThriftObjectType.FULL) { - Preconditions.checkState(tHdfsPartition.isSetNum_blocks() && - tHdfsPartition.isSetTotal_file_size_bytes()); - stats.numBlocks += tHdfsPartition.getNum_blocks(); - stats.numFiles += - tHdfsPartition.isSetFile_desc() ? tHdfsPartition.getFile_desc().size() : 0; - stats.numFiles += tHdfsPartition.isSetInsert_file_desc() ? - tHdfsPartition.getInsert_file_desc().size() : 0; - stats.numFiles += tHdfsPartition.isSetDelete_file_desc() ? - tHdfsPartition.getDelete_file_desc().size() : 0; - stats.totalFileBytes += tHdfsPartition.getTotal_file_size_bytes(); - } idToPartition.put(id, tHdfsPartition); } } - if (type == ThriftObjectType.FULL) fileMetadataStats_.set(stats); - THdfsPartition prototypePartition = FeCatalogUtils.fsPartitionToThrift( prototypePartition_, ThriftObjectType.DESCRIPTOR_ONLY); - - memUsageEstimate += fileMetadataStats_.numFiles * PER_FD_MEM_USAGE_BYTES + - fileMetadataStats_.numBlocks * PER_BLOCK_MEM_USAGE_BYTES; - if (type == ThriftObjectType.FULL) { - // These metrics only make sense when we are collecting a FULL object. - setEstimatedMetadataSize(memUsageEstimate); - setNumFiles(fileMetadataStats_.numFiles); - } THdfsTable hdfsTable = new THdfsTable(hdfsBaseDir_, getColumnNames(), getNullPartitionKeyValue(), nullColumnValue_, idToPartition, prototypePartition); hdfsTable.setAvroSchema(avroSchema_); diff --git a/fe/src/main/java/org/apache/impala/catalog/IcebergFileMetadataLoader.java b/fe/src/main/java/org/apache/impala/catalog/IcebergFileMetadataLoader.java index b9ceff848..69ec01190 100644 --- a/fe/src/main/java/org/apache/impala/catalog/IcebergFileMetadataLoader.java +++ b/fe/src/main/java/org/apache/impala/catalog/IcebergFileMetadataLoader.java @@ -49,6 +49,7 @@ import org.apache.hadoop.hive.common.ValidWriteIdList; import org.apache.iceberg.ContentFile; import org.apache.impala.catalog.FeIcebergTable.Utils; import org.apache.impala.catalog.HdfsPartition.FileDescriptor; +import org.apache.impala.catalog.HdfsTable.FileMetadataStats; import org.apache.impala.catalog.iceberg.GroupedContentFiles; import org.apache.impala.common.FileSystemUtil; import org.apache.impala.common.Reference; @@ -121,6 +122,7 @@ public class IcebergFileMetadataLoader extends FileMetadataLoader { private void loadInternal() throws CatalogException, IOException { loadedFds_ = new ArrayList<>(); loadStats_ = new LoadStats(partDir_); + fileMetadataStats_ = new FileMetadataStats(); // Process the existing Fd ContentFile and return the newly added ContentFile Iterable<ContentFile<?>> newContentFiles = loadContentFilesWithOldFds(); @@ -144,7 +146,9 @@ public class IcebergFileMetadataLoader extends FileMetadataLoader { if (FileSystemUtil.supportsStorageIds(fsForPath)) { filesSupportsStorageIds.add(Pair.create(fsForPath, contentFile)); } else { - loadedFds_.add(createFd(fsForPath, contentFile, null, null)); + FileDescriptor fd = createFd(fsForPath, contentFile, null, null); + loadedFds_.add(fd); + fileMetadataStats_.accumulate(fd); ++loadStats_.loadedFiles; } } @@ -162,8 +166,10 @@ public class IcebergFileMetadataLoader extends FileMetadataLoader { Path path = FileSystemUtil.createFullyQualifiedPath( new Path(contentFileInfo.getSecond().path().toString())); FileStatus stat = nameToFileStatus.get(path); - loadedFds_.add(createFd(contentFileInfo.getFirst(), contentFileInfo.getSecond(), - stat, numUnknownDiskIds)); + FileDescriptor fd = createFd(contentFileInfo.getFirst(), + contentFileInfo.getSecond(), stat, numUnknownDiskIds); + loadedFds_.add(fd); + fileMetadataStats_.accumulate(fd); } loadStats_.loadedFiles += filesSupportsStorageIds.size(); loadStats_.unknownDiskIds += numUnknownDiskIds.getRef(); @@ -192,7 +198,8 @@ public class IcebergFileMetadataLoader extends FileMetadataLoader { newContentFiles.add(contentFile); } else { ++loadStats_.skippedFiles; - loadedFds_.add(Preconditions.checkNotNull(fd)); + loadedFds_.add(fd); + fileMetadataStats_.accumulate(fd); } } return newContentFiles; diff --git a/fe/src/main/java/org/apache/impala/catalog/ParallelFileMetadataLoader.java b/fe/src/main/java/org/apache/impala/catalog/ParallelFileMetadataLoader.java index 4ee6beaed..7070884a8 100644 --- a/fe/src/main/java/org/apache/impala/catalog/ParallelFileMetadataLoader.java +++ b/fe/src/main/java/org/apache/impala/catalog/ParallelFileMetadataLoader.java @@ -160,6 +160,7 @@ public class ParallelFileMetadataLoader { } else { partBuilder.setFileDescriptors(loader.getLoadedFds()); } + partBuilder.setFileMetadataStats(loader.getFileMetadataStats()); } } } diff --git a/fe/src/test/java/org/apache/impala/testutil/ImpaladTestCatalog.java b/fe/src/test/java/org/apache/impala/testutil/ImpaladTestCatalog.java index 95c9a10ab..814e1cbc0 100644 --- a/fe/src/test/java/org/apache/impala/testutil/ImpaladTestCatalog.java +++ b/fe/src/test/java/org/apache/impala/testutil/ImpaladTestCatalog.java @@ -135,9 +135,6 @@ public class ImpaladTestCatalog extends ImpaladCatalog { } Preconditions.checkNotNull(newTbl); Preconditions.checkState(newTbl.isLoaded()); - if (newTbl instanceof HdfsTable) { - ((HdfsTable) newTbl).computeHdfsStatsForTesting(); - } db.addTable(newTbl); return newTbl; } diff --git a/tests/custom_cluster/test_web_pages.py b/tests/custom_cluster/test_web_pages.py index 00e6a04d9..adc8a74c4 100644 --- a/tests/custom_cluster/test_web_pages.py +++ b/tests/custom_cluster/test_web_pages.py @@ -32,6 +32,7 @@ from tests.common.skip import SkipIfFS from tests.shell.util import run_impala_shell_cmd SMALL_QUERY_LOG_SIZE_IN_BYTES = 40 * 1024 +CATALOG_URL = "http://localhost:25020/catalog" class TestWebPage(CustomClusterTestSuite): @@ -505,3 +506,43 @@ class TestWebPage(CustomClusterTestSuite): self.execute_query("invalidate metadata") page = requests.get("http://localhost:25020/events").text assert "Unexpected exception" not in page, "Still see error message:\n" + page + + def _test_catalog_tables_stats_after_describe(self, table_full_name, num_files): + """Test the lists of tables with Most Number of Files and Highest Memory Requirements + in the catalog page. Start a new cluster to make sure the table is not loaded before + DESCRIBE.""" + + def get_table_metric(content, list_name, key): + table_list = content[list_name] + for table in table_list: + if table["name"] == table_full_name: + return table[key] + return None + + # The table is not in the lists after the cluster starts + content = self.get_debug_page(CATALOG_URL + "?json") + assert get_table_metric(content, "large_tables", "mem_estimate") is None + assert get_table_metric(content, "high_file_count_tables", "num_files") is None + + self.client.execute("DESCRIBE {0}".format(table_full_name)) + content = self.get_debug_page(CATALOG_URL + "?json") + assert get_table_metric(content, "large_tables", "mem_estimate") > 0 + assert get_table_metric(content, "high_file_count_tables", "num_files") == num_files + + @pytest.mark.execute_serially + @CustomClusterTestSuite.with_args( + catalogd_args="--catalog_topic_mode=full", + impalad_args="--use_local_catalog=false") + def test_catalog_tables_stats_legacy_catalog(self): + self._test_catalog_tables_stats_after_describe("functional.alltypes", 24) + self._test_catalog_tables_stats_after_describe( + "functional_parquet.iceberg_lineitem_sixblocks", 4) + + @pytest.mark.execute_serially + @CustomClusterTestSuite.with_args( + catalogd_args="--catalog_topic_mode=minimal", + impalad_args="--use_local_catalog=true") + def test_catalog_tables_stats_local_catalog(self): + self._test_catalog_tables_stats_after_describe("functional.alltypes", 24) + self._test_catalog_tables_stats_after_describe( + "functional_parquet.iceberg_lineitem_sixblocks", 4)
