This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch branch-1.2-lts
in repository https://gitbox.apache.org/repos/asf/doris.git

commit cc11695402db871baba9aaed514cb37e490b236d
Author: Jibing-Li <[email protected]>
AuthorDate: Wed Feb 8 16:51:30 2023 +0800

    [Fix](multi catalog)(planner) Fix external table statistic collection bug 
(#16486)
    
    Add index id to column statistic id. Refresh statistic cache after analyze.
---
 .../doris/catalog/external/HMSExternalTable.java    |  1 +
 .../java/org/apache/doris/qe/SessionVariable.java   |  2 ++
 .../apache/doris/statistics/HiveAnalysisTask.java   | 21 +++++++++++++++++++--
 3 files changed, 22 insertions(+), 2 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java
 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java
index 54a3c010dc..79df8bd6bf 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java
@@ -292,6 +292,7 @@ public class HMSExternalTable extends ExternalTable {
 
     @Override
     public List<Column> initSchema() {
+        makeSureInitialized();
         List<Column> columns;
         List<FieldSchema> schema = ((HMSExternalCatalog) 
catalog).getClient().getSchema(dbName, name);
         if (dlaType.equals(DLAType.ICEBERG)) {
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java 
b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
index 53b9e5f534..428eca11ae 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
@@ -27,6 +27,7 @@ import org.apache.doris.thrift.TResourceLimit;
 
 import com.google.common.base.Joiner;
 import com.google.common.base.Strings;
+import com.google.common.collect.Lists;
 import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.Logger;
 import org.json.simple.JSONObject;
@@ -38,6 +39,7 @@ import java.io.IOException;
 import java.io.Serializable;
 import java.lang.reflect.Field;
 import java.util.HashMap;
+import java.util.List;
 import java.util.Map;
 import java.util.Random;
 
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/HiveAnalysisTask.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/HiveAnalysisTask.java
index 4ad00bdc75..381f40f8ac 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/HiveAnalysisTask.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/HiveAnalysisTask.java
@@ -17,6 +17,7 @@
 
 package org.apache.doris.statistics;
 
+import org.apache.doris.catalog.Env;
 import org.apache.doris.common.FeConstants;
 import org.apache.doris.datasource.HMSExternalCatalog;
 import org.apache.doris.qe.AutoCloseConnectContext;
@@ -50,6 +51,7 @@ public class HiveAnalysisTask extends HMSAnalysisTask {
     public static final String NUM_ROWS = "numRows";
     public static final String NUM_FILES = "numFiles";
     public static final String TIMESTAMP = "transient_lastDdlTime";
+    public static final String DELIMITER = "-";
 
     public HiveAnalysisTask(AnalysisTaskScheduler analysisTaskScheduler, 
AnalysisTaskInfo info) {
         super(analysisTaskScheduler, info);
@@ -81,7 +83,7 @@ public class HiveAnalysisTask extends HMSAnalysisTask {
         Map<String, String> parameters = 
table.getRemoteTable().getParameters();
         // Collect table level row count, null number and timestamp.
         setParameterData(parameters, params);
-        params.put("id", String.valueOf(tbl.getId()) + "-" + 
String.valueOf(col.getName()));
+        params.put("id", genColumnStatId(tbl.getId(), -1, col.getName(), 
null));
         List<ColumnStatisticsObj> tableStats = 
table.getHiveTableColumnStats(columns);
         // Collect table level ndv, nulls, min and max. tableStats contains at 
most 1 item;
         for (ColumnStatisticsObj tableStat : tableStats) {
@@ -113,7 +115,7 @@ public class HiveAnalysisTask extends HMSAnalysisTask {
             parameters = partition.getParameters();
             // Collect row count, null number and timestamp.
             setParameterData(parameters, params);
-            params.put("id", String.valueOf(tbl.getId()) + "-" + 
String.valueOf(col.getName()) + "-" + partName);
+            params.put("id", genColumnStatId(tbl.getId(), -1, col.getName(), 
partName));
             params.put("partId", partName);
             List<ColumnStatisticsObj> value = entry.getValue();
             Preconditions.checkState(value.size() == 1);
@@ -134,6 +136,7 @@ public class HiveAnalysisTask extends HMSAnalysisTask {
                 this.stmtExecutor.execute();
             }
         }
+        Env.getCurrentEnv().getStatisticsCache().refreshSync(tbl.getId(), -1, 
col.getName());
     }
 
     private void getStatData(ColumnStatisticsData data, Map<String, String> 
params) {
@@ -201,4 +204,18 @@ public class HiveAnalysisTask extends HMSAnalysisTask {
         SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
         params.put("update_time", sdf.format(new Date(timestamp * 1000)));
     }
+
+    private String genColumnStatId(long tableId, long indexId, String 
columnName, String partitionName) {
+        StringBuilder sb = new StringBuilder();
+        sb.append(tableId);
+        sb.append(DELIMITER);
+        sb.append(indexId);
+        sb.append(DELIMITER);
+        sb.append(columnName);
+        if (partitionName != null) {
+            sb.append(DELIMITER);
+            sb.append(partitionName);
+        }
+        return sb.toString();
+    }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to