This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 6ef34557861903f8b6356b0abf2e57326dcaa095
Author: Jibing-Li <[email protected]>
AuthorDate: Thu Feb 29 11:22:58 2024 +0800

    [fix](statistics)Fix hms external table get row count bug while analyze 
(#31557)
    
    asdasd
---
 .../doris/statistics/ExternalAnalysisTask.java     |  6 ++---
 .../apache/doris/statistics/HMSAnalysisTask.java   | 30 ++++++++++++----------
 .../doris/statistics/HMSAnalysisTaskTest.java      |  4 +--
 3 files changed, 21 insertions(+), 19 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/ExternalAnalysisTask.java
 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/ExternalAnalysisTask.java
index 6dca6ee5ad3..ef1b795bd13 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/ExternalAnalysisTask.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/ExternalAnalysisTask.java
@@ -59,7 +59,7 @@ public class ExternalAnalysisTask extends BaseAnalysisTask {
         if (isTableLevelTask) {
             getTableStats();
         } else {
-            getOrdinaryColumnStats();
+            getColumnStats();
         }
     }
 
@@ -83,8 +83,8 @@ public class ExternalAnalysisTask extends BaseAnalysisTask {
         job.rowCountDone(this);
     }
 
-    // Get ordinary column stats
-    protected void getOrdinaryColumnStats() throws Exception {
+    // Get column stats
+    protected void getColumnStats() throws Exception {
         StringBuilder sb = new StringBuilder();
         Map<String, String> params = buildStatsParams("NULL");
         params.put("min", getMinFunction());
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java
index 597acfdfddb..7235e795833 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java
@@ -17,7 +17,6 @@
 
 package org.apache.doris.statistics;
 
-import org.apache.doris.catalog.Env;
 import org.apache.doris.common.AnalysisException;
 import org.apache.doris.datasource.ExternalTable;
 import org.apache.doris.datasource.hive.HMSExternalTable;
@@ -59,8 +58,10 @@ public class HMSAnalysisTask extends ExternalAnalysisTask {
 
 
     @Override
-    protected void getOrdinaryColumnStats() throws Exception {
-        if (!info.usingSqlForPartitionColumn) {
+    protected void getColumnStats() throws Exception {
+        if (info.usingSqlForPartitionColumn) {
+            super.getColumnStats();
+        } else {
             try {
                 if (isPartitionColumn()) {
                     getPartitionColumnStats();
@@ -72,10 +73,8 @@ public class HMSAnalysisTask extends ExternalAnalysisTask {
                         + "fallback to normal collection",
                         isPartitionColumn() ? "partition " : "", 
col.getName(), e);
                 /* retry using sql way! */
-                super.getOrdinaryColumnStats();
+                super.getColumnStats();
             }
-        } else {
-            super.getOrdinaryColumnStats();
         }
     }
 
@@ -107,10 +106,11 @@ public class HMSAnalysisTask extends ExternalAnalysisTask 
{
                 }
             }
         }
-        // Estimate the row count. This value is inaccurate if the table stats 
is empty.
-        TableStatsMeta tableStatsStatus = 
Env.getCurrentEnv().getAnalysisManager()
-                .findTableStatsStatus(hmsExternalTable.getId());
-        long count = tableStatsStatus == null ? hmsExternalTable.getRowCount() 
: tableStatsStatus.rowCount;
+        // getRowCount may return 0 if cache is empty, in this case, call 
fetchRowCount.
+        long count = hmsExternalTable.getRowCount();
+        if (count == 0) {
+            count = hmsExternalTable.fetchRowCount();
+        }
         dataSize = dataSize * count / partitionNames.size();
         numNulls = numNulls * count / partitionNames.size();
         int ndv = ndvPartValues.size();
@@ -129,9 +129,11 @@ public class HMSAnalysisTask extends ExternalAnalysisTask {
 
     // Collect the spark analyzed column stats through HMS metadata.
     private void getHmsColumnStats() throws Exception {
-        TableStatsMeta tableStatsStatus = 
Env.getCurrentEnv().getAnalysisManager()
-                .findTableStatsStatus(hmsExternalTable.getId());
-        long count = tableStatsStatus == null ? hmsExternalTable.getRowCount() 
: tableStatsStatus.rowCount;
+        // getRowCount may return 0 if cache is empty, in this case, call 
fetchRowCount.
+        long count = hmsExternalTable.getRowCount();
+        if (count == 0) {
+            count = hmsExternalTable.fetchRowCount();
+        }
 
         Map<String, String> params = buildStatsParams("NULL");
         Map<StatsType, String> statsParams = new HashMap<>();
@@ -141,7 +143,7 @@ public class HMSAnalysisTask extends ExternalAnalysisTask {
         statsParams.put(StatsType.MAX_VALUE, "max");
         statsParams.put(StatsType.AVG_SIZE, "avg_len");
 
-        if (hmsExternalTable.fillColumnStatistics(info.colName, statsParams, 
params)) {
+        if (!hmsExternalTable.fillColumnStatistics(info.colName, statsParams, 
params)) {
             throw new AnalysisException("some column stats not available");
         }
 
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/statistics/HMSAnalysisTaskTest.java 
b/fe/fe-core/src/test/java/org/apache/doris/statistics/HMSAnalysisTaskTest.java
index b98338eb394..96a077c8056 100644
--- 
a/fe/fe-core/src/test/java/org/apache/doris/statistics/HMSAnalysisTaskTest.java
+++ 
b/fe/fe-core/src/test/java/org/apache/doris/statistics/HMSAnalysisTaskTest.java
@@ -252,7 +252,7 @@ public class HMSAnalysisTaskTest {
         analysisInfoBuilder.setUsingSqlForPartitionColumn(true);
         task.info = analysisInfoBuilder.build();
 
-        task.getOrdinaryColumnStats();
+        task.getColumnStats();
     }
 
 
@@ -309,6 +309,6 @@ public class HMSAnalysisTaskTest {
         analysisInfoBuilder.setUsingSqlForPartitionColumn(false);
         task.info = analysisInfoBuilder.build();
 
-        task.getOrdinaryColumnStats();
+        task.getColumnStats();
     }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to