This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch branch-2.1 in repository https://gitbox.apache.org/repos/asf/doris.git
commit 6ef34557861903f8b6356b0abf2e57326dcaa095 Author: Jibing-Li <[email protected]> AuthorDate: Thu Feb 29 11:22:58 2024 +0800 [fix](statistics)Fix hms external table get row count bug while analyze (#31557) asdasd --- .../doris/statistics/ExternalAnalysisTask.java | 6 ++--- .../apache/doris/statistics/HMSAnalysisTask.java | 30 ++++++++++++---------- .../doris/statistics/HMSAnalysisTaskTest.java | 4 +-- 3 files changed, 21 insertions(+), 19 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/ExternalAnalysisTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/ExternalAnalysisTask.java index 6dca6ee5ad3..ef1b795bd13 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/ExternalAnalysisTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/ExternalAnalysisTask.java @@ -59,7 +59,7 @@ public class ExternalAnalysisTask extends BaseAnalysisTask { if (isTableLevelTask) { getTableStats(); } else { - getOrdinaryColumnStats(); + getColumnStats(); } } @@ -83,8 +83,8 @@ public class ExternalAnalysisTask extends BaseAnalysisTask { job.rowCountDone(this); } - // Get ordinary column stats - protected void getOrdinaryColumnStats() throws Exception { + // Get column stats + protected void getColumnStats() throws Exception { StringBuilder sb = new StringBuilder(); Map<String, String> params = buildStatsParams("NULL"); params.put("min", getMinFunction()); diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java index 597acfdfddb..7235e795833 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java @@ -17,7 +17,6 @@ package org.apache.doris.statistics; -import org.apache.doris.catalog.Env; import org.apache.doris.common.AnalysisException; import org.apache.doris.datasource.ExternalTable; import org.apache.doris.datasource.hive.HMSExternalTable; @@ -59,8 +58,10 @@ public class HMSAnalysisTask extends ExternalAnalysisTask { @Override - protected void getOrdinaryColumnStats() throws Exception { - if (!info.usingSqlForPartitionColumn) { + protected void getColumnStats() throws Exception { + if (info.usingSqlForPartitionColumn) { + super.getColumnStats(); + } else { try { if (isPartitionColumn()) { getPartitionColumnStats(); @@ -72,10 +73,8 @@ public class HMSAnalysisTask extends ExternalAnalysisTask { + "fallback to normal collection", isPartitionColumn() ? "partition " : "", col.getName(), e); /* retry using sql way! */ - super.getOrdinaryColumnStats(); + super.getColumnStats(); } - } else { - super.getOrdinaryColumnStats(); } } @@ -107,10 +106,11 @@ public class HMSAnalysisTask extends ExternalAnalysisTask { } } } - // Estimate the row count. This value is inaccurate if the table stats is empty. - TableStatsMeta tableStatsStatus = Env.getCurrentEnv().getAnalysisManager() - .findTableStatsStatus(hmsExternalTable.getId()); - long count = tableStatsStatus == null ? hmsExternalTable.getRowCount() : tableStatsStatus.rowCount; + // getRowCount may return 0 if cache is empty, in this case, call fetchRowCount. + long count = hmsExternalTable.getRowCount(); + if (count == 0) { + count = hmsExternalTable.fetchRowCount(); + } dataSize = dataSize * count / partitionNames.size(); numNulls = numNulls * count / partitionNames.size(); int ndv = ndvPartValues.size(); @@ -129,9 +129,11 @@ public class HMSAnalysisTask extends ExternalAnalysisTask { // Collect the spark analyzed column stats through HMS metadata. private void getHmsColumnStats() throws Exception { - TableStatsMeta tableStatsStatus = Env.getCurrentEnv().getAnalysisManager() - .findTableStatsStatus(hmsExternalTable.getId()); - long count = tableStatsStatus == null ? hmsExternalTable.getRowCount() : tableStatsStatus.rowCount; + // getRowCount may return 0 if cache is empty, in this case, call fetchRowCount. + long count = hmsExternalTable.getRowCount(); + if (count == 0) { + count = hmsExternalTable.fetchRowCount(); + } Map<String, String> params = buildStatsParams("NULL"); Map<StatsType, String> statsParams = new HashMap<>(); @@ -141,7 +143,7 @@ public class HMSAnalysisTask extends ExternalAnalysisTask { statsParams.put(StatsType.MAX_VALUE, "max"); statsParams.put(StatsType.AVG_SIZE, "avg_len"); - if (hmsExternalTable.fillColumnStatistics(info.colName, statsParams, params)) { + if (!hmsExternalTable.fillColumnStatistics(info.colName, statsParams, params)) { throw new AnalysisException("some column stats not available"); } diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/HMSAnalysisTaskTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/HMSAnalysisTaskTest.java index b98338eb394..96a077c8056 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/HMSAnalysisTaskTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/HMSAnalysisTaskTest.java @@ -252,7 +252,7 @@ public class HMSAnalysisTaskTest { analysisInfoBuilder.setUsingSqlForPartitionColumn(true); task.info = analysisInfoBuilder.build(); - task.getOrdinaryColumnStats(); + task.getColumnStats(); } @@ -309,6 +309,6 @@ public class HMSAnalysisTaskTest { analysisInfoBuilder.setUsingSqlForPartitionColumn(false); task.info = analysisInfoBuilder.build(); - task.getOrdinaryColumnStats(); + task.getColumnStats(); } } --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
