This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch branch-1.2-lts in repository https://gitbox.apache.org/repos/asf/doris.git
commit cc11695402db871baba9aaed514cb37e490b236d Author: Jibing-Li <[email protected]> AuthorDate: Wed Feb 8 16:51:30 2023 +0800 [Fix](multi catalog)(planner) Fix external table statistic collection bug (#16486) Add index id to column statistic id. Refresh statistic cache after analyze. --- .../doris/catalog/external/HMSExternalTable.java | 1 + .../java/org/apache/doris/qe/SessionVariable.java | 2 ++ .../apache/doris/statistics/HiveAnalysisTask.java | 21 +++++++++++++++++++-- 3 files changed, 22 insertions(+), 2 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java index 54a3c010dc..79df8bd6bf 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java @@ -292,6 +292,7 @@ public class HMSExternalTable extends ExternalTable { @Override public List<Column> initSchema() { + makeSureInitialized(); List<Column> columns; List<FieldSchema> schema = ((HMSExternalCatalog) catalog).getClient().getSchema(dbName, name); if (dlaType.equals(DLAType.ICEBERG)) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java index 53b9e5f534..428eca11ae 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java @@ -27,6 +27,7 @@ import org.apache.doris.thrift.TResourceLimit; import com.google.common.base.Joiner; import com.google.common.base.Strings; +import com.google.common.collect.Lists; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.json.simple.JSONObject; @@ -38,6 +39,7 @@ import java.io.IOException; import java.io.Serializable; import java.lang.reflect.Field; import java.util.HashMap; +import java.util.List; import java.util.Map; import java.util.Random; diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/HiveAnalysisTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/HiveAnalysisTask.java index 4ad00bdc75..381f40f8ac 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/HiveAnalysisTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/HiveAnalysisTask.java @@ -17,6 +17,7 @@ package org.apache.doris.statistics; +import org.apache.doris.catalog.Env; import org.apache.doris.common.FeConstants; import org.apache.doris.datasource.HMSExternalCatalog; import org.apache.doris.qe.AutoCloseConnectContext; @@ -50,6 +51,7 @@ public class HiveAnalysisTask extends HMSAnalysisTask { public static final String NUM_ROWS = "numRows"; public static final String NUM_FILES = "numFiles"; public static final String TIMESTAMP = "transient_lastDdlTime"; + public static final String DELIMITER = "-"; public HiveAnalysisTask(AnalysisTaskScheduler analysisTaskScheduler, AnalysisTaskInfo info) { super(analysisTaskScheduler, info); @@ -81,7 +83,7 @@ public class HiveAnalysisTask extends HMSAnalysisTask { Map<String, String> parameters = table.getRemoteTable().getParameters(); // Collect table level row count, null number and timestamp. setParameterData(parameters, params); - params.put("id", String.valueOf(tbl.getId()) + "-" + String.valueOf(col.getName())); + params.put("id", genColumnStatId(tbl.getId(), -1, col.getName(), null)); List<ColumnStatisticsObj> tableStats = table.getHiveTableColumnStats(columns); // Collect table level ndv, nulls, min and max. tableStats contains at most 1 item; for (ColumnStatisticsObj tableStat : tableStats) { @@ -113,7 +115,7 @@ public class HiveAnalysisTask extends HMSAnalysisTask { parameters = partition.getParameters(); // Collect row count, null number and timestamp. setParameterData(parameters, params); - params.put("id", String.valueOf(tbl.getId()) + "-" + String.valueOf(col.getName()) + "-" + partName); + params.put("id", genColumnStatId(tbl.getId(), -1, col.getName(), partName)); params.put("partId", partName); List<ColumnStatisticsObj> value = entry.getValue(); Preconditions.checkState(value.size() == 1); @@ -134,6 +136,7 @@ public class HiveAnalysisTask extends HMSAnalysisTask { this.stmtExecutor.execute(); } } + Env.getCurrentEnv().getStatisticsCache().refreshSync(tbl.getId(), -1, col.getName()); } private void getStatData(ColumnStatisticsData data, Map<String, String> params) { @@ -201,4 +204,18 @@ public class HiveAnalysisTask extends HMSAnalysisTask { SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); params.put("update_time", sdf.format(new Date(timestamp * 1000))); } + + private String genColumnStatId(long tableId, long indexId, String columnName, String partitionName) { + StringBuilder sb = new StringBuilder(); + sb.append(tableId); + sb.append(DELIMITER); + sb.append(indexId); + sb.append(DELIMITER); + sb.append(columnName); + if (partitionName != null) { + sb.append(DELIMITER); + sb.append(partitionName); + } + return sb.toString(); + } } --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
