This is an automated email from the ASF dual-hosted git repository.

lijibing pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new dd03546959f [improvement](statistics)Add log for estimating hive table 
row count logic. (#42921)
dd03546959f is described below

commit dd03546959f0d3bb36887a92dc71229940a7417f
Author: Jibing-Li <[email protected]>
AuthorDate: Thu Oct 31 10:36:11 2024 +0800

    [improvement](statistics)Add log for estimating hive table row count logic. 
(#42921)
    
    Add log for estimating hive table row count logic to help investigate
    user issues.
---
 .../doris/datasource/ExternalRowCountCache.java    |  1 +
 .../doris/datasource/hive/HMSExternalTable.java    | 28 ++++++++++++++++++----
 .../doris/datasource/iceberg/IcebergUtils.java     |  5 +++-
 .../doris/statistics/util/StatisticsUtil.java      |  7 +++++-
 4 files changed, 35 insertions(+), 6 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalRowCountCache.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalRowCountCache.java
index 4602c594571..075091e682d 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalRowCountCache.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalRowCountCache.java
@@ -113,6 +113,7 @@ public class ExternalRowCountCache {
             if (f.isDone()) {
                 return f.get().orElse(-1L);
             }
+            LOG.info("Row count for table {}.{}.{} is still processing.", 
catalogId, dbId, tableId);
         } catch (Exception e) {
             LOG.warn("Unexpected exception while returning row count", e);
         }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java
index 516448bdfbb..f72421da8a1 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java
@@ -525,7 +525,7 @@ public class HMSExternalTable extends ExternalTable 
implements MTMVRelatedTableI
         long rowCount = getRowCountFromExternalSource();
         // Only hive table supports estimate row count by listing file.
         if (rowCount == -1 && dlaType.equals(DLAType.HIVE)) {
-            LOG.debug("Will estimate row count from file list.");
+            LOG.info("Will estimate row count for table {} from file list.", 
name);
             rowCount = getRowCountFromFileList();
         }
         return rowCount;
@@ -837,14 +837,16 @@ public class HMSExternalTable extends ExternalTable 
implements MTMVRelatedTableI
             return -1;
         }
         if (isView()) {
+            LOG.info("Table {} is view, return 0.", name);
             return 0;
         }
         HiveMetaStoreCache.HivePartitionValues partitionValues = 
getAllPartitionValues();
 
         // Get files for all partitions.
         int samplePartitionSize = Config.hive_stats_partition_sample_size;
-        List<HiveMetaStoreCache.FileCacheValue> filesByPartitions = 
getFilesForPartitions(partitionValues,
-                samplePartitionSize);
+        List<HiveMetaStoreCache.FileCacheValue> filesByPartitions =
+                getFilesForPartitions(partitionValues, samplePartitionSize);
+        LOG.info("Number of files selected for hive table {} is {}", name, 
filesByPartitions.size());
         long totalSize = 0;
         // Calculate the total file size.
         for (HiveMetaStoreCache.FileCacheValue files : filesByPartitions) {
@@ -863,14 +865,20 @@ public class HMSExternalTable extends ExternalTable 
implements MTMVRelatedTableI
             estimatedRowSize += column.getDataType().getSlotSize();
         }
         if (estimatedRowSize == 0) {
+            LOG.warn("Table {} estimated size is 0, return 0.", name);
             return 0;
         }
 
         int totalPartitionSize = partitionValues == null ? 1 : 
partitionValues.getIdToPartitionItem().size();
         if (samplePartitionSize != 0 && samplePartitionSize < 
totalPartitionSize) {
+            LOG.info("Table {} sampled {} of {} partitions, sampled size is 
{}",
+                    name, samplePartitionSize, totalPartitionSize, totalSize);
             totalSize = totalSize * totalPartitionSize / samplePartitionSize;
         }
-        return totalSize / estimatedRowSize;
+        long rows = totalSize / estimatedRowSize;
+        LOG.info("Table {} rows {}, total size is {}, estimatedRowSize is {}",
+                name, rows, totalSize, estimatedRowSize);
+        return rows;
     }
 
     // Get all partition values from cache.
@@ -888,6 +896,12 @@ public class HMSExternalTable extends ExternalTable 
implements MTMVRelatedTableI
             // no need to worry that this call will invalid or refresh the 
cache.
             // because it has enough space to keep partition info of all 
tables in cache.
             partitionValues = cache.getPartitionValues(dbName, name, 
partitionColumnTypes);
+            if (partitionValues == null || 
partitionValues.getPartitionNameToIdMap() == null) {
+                LOG.warn("Partition values for hive table {} is null", name);
+            } else {
+                LOG.info("Partition values size for hive table {} is {}",
+                        name, 
partitionValues.getPartitionNameToIdMap().size());
+            }
         }
         return partitionValues;
     }
@@ -923,6 +937,7 @@ public class HMSExternalTable extends ExternalTable 
implements MTMVRelatedTableI
             // get partitions without cache, so that it will not invalid the 
cache when executing
             // non query request such as `show table status`
             hivePartitions = cache.getAllPartitionsWithoutCache(dbName, name, 
partitionValuesList);
+            LOG.info("Partition list size for hive partition table {} is {}", 
name, hivePartitions.size());
         } else {
             hivePartitions.add(new HivePartition(dbName, name, true,
                     getRemoteTable().getSd().getInputFormat(),
@@ -930,6 +945,11 @@ public class HMSExternalTable extends ExternalTable 
implements MTMVRelatedTableI
         }
         // Get files for all partitions.
         String bindBrokerName = catalog.bindBrokerName();
+        if (LOG.isDebugEnabled()) {
+            for (HivePartition partition : hivePartitions) {
+                LOG.debug("Chosen partition for table {}. [{}]", name, 
partition.toString());
+            }
+        }
         return cache.getFilesByPartitionsWithoutCache(hivePartitions, 
bindBrokerName);
     }
 
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergUtils.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergUtils.java
index 893ee7bc93b..7ae600756f1 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergUtils.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergUtils.java
@@ -604,11 +604,14 @@ public class IcebergUtils {
                 .getIcebergTable(catalog, dbName, tbName);
         Snapshot snapshot = icebergTable.currentSnapshot();
         if (snapshot == null) {
+            LOG.info("Iceberg table {}.{}.{} is empty, return row count 0.", 
catalog.getName(), dbName, tbName);
             // empty table
             return 0;
         }
         Map<String, String> summary = snapshot.summary();
-        return Long.parseLong(summary.get(TOTAL_RECORDS)) - 
Long.parseLong(summary.get(TOTAL_POSITION_DELETES));
+        long rows = Long.parseLong(summary.get(TOTAL_RECORDS)) - 
Long.parseLong(summary.get(TOTAL_POSITION_DELETES));
+        LOG.info("Iceberg table {}.{}.{} row count in summary is {}", 
catalog.getName(), dbName, tbName, rows);
+        return rows;
     }
 
 
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java
index d51281eb0e6..b0fc3b9c1cf 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java
@@ -626,6 +626,7 @@ public class StatisticsUtil {
             long rows = Long.parseLong(parameters.get(NUM_ROWS));
             // Sometimes, the NUM_ROWS in hms is 0 but actually is not. Need 
to check TOTAL_SIZE if NUM_ROWS is 0.
             if (rows != 0) {
+                LOG.info("Get row count {} for hive table {} in table 
parameters.", rows, table.getName());
                 return rows;
             }
         }
@@ -639,9 +640,13 @@ public class StatisticsUtil {
             estimatedRowSize += column.getDataType().getSlotSize();
         }
         if (estimatedRowSize == 0) {
+            LOG.warn("Hive table {} estimated row size is invalid {}", 
table.getName(), estimatedRowSize);
             return -1;
         }
-        return totalSize / estimatedRowSize;
+        long rows = totalSize / estimatedRowSize;
+        LOG.info("Get row count {} for hive table {} by total size {} and row 
size {}",
+                rows, table.getName(), totalSize, estimatedRowSize);
+        return rows;
     }
 
     /**


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to