This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 666f7096f2 [Fix](multi catalog)(planner) Fix external table statistic
collection bug (#16486)
666f7096f2 is described below
commit 666f7096f21a882a23a7c73542dc598b0ad9270b
Author: Jibing-Li <[email protected]>
AuthorDate: Wed Feb 8 16:51:30 2023 +0800
[Fix](multi catalog)(planner) Fix external table statistic collection bug
(#16486)
Add index id to column statistic id. Refresh statistic cache after analyze.
---
.../doris/catalog/external/HMSExternalTable.java | 1 +
.../apache/doris/statistics/HiveAnalysisTask.java | 21 +++++++++++++++++++--
2 files changed, 20 insertions(+), 2 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java
b/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java
index 4d4fc310bd..1a558e1bd9 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java
@@ -294,6 +294,7 @@ public class HMSExternalTable extends ExternalTable {
@Override
public List<Column> initSchema() {
+ makeSureInitialized();
List<Column> columns;
List<FieldSchema> schema = ((HMSExternalCatalog)
catalog).getClient().getSchema(dbName, name);
if (dlaType.equals(DLAType.ICEBERG)) {
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/HiveAnalysisTask.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/HiveAnalysisTask.java
index d22e2abe78..c0ca6df80c 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/HiveAnalysisTask.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/HiveAnalysisTask.java
@@ -17,6 +17,7 @@
package org.apache.doris.statistics;
+import org.apache.doris.catalog.Env;
import org.apache.doris.common.FeConstants;
import org.apache.doris.datasource.HMSExternalCatalog;
import org.apache.doris.qe.AutoCloseConnectContext;
@@ -54,6 +55,7 @@ public class HiveAnalysisTask extends HMSAnalysisTask {
public static final String NUM_ROWS = "numRows";
public static final String NUM_FILES = "numFiles";
public static final String TIMESTAMP = "transient_lastDdlTime";
+ public static final String DELIMITER = "-";
public HiveAnalysisTask(AnalysisTaskScheduler analysisTaskScheduler,
AnalysisTaskInfo info) {
super(analysisTaskScheduler, info);
@@ -85,7 +87,7 @@ public class HiveAnalysisTask extends HMSAnalysisTask {
Map<String, String> parameters =
table.getRemoteTable().getParameters();
// Collect table level row count, null number and timestamp.
setParameterData(parameters, params);
- params.put("id", String.valueOf(tbl.getId()) + "-" +
String.valueOf(col.getName()));
+ params.put("id", genColumnStatId(tbl.getId(), -1, col.getName(),
null));
List<ColumnStatisticsObj> tableStats =
table.getHiveTableColumnStats(columns);
// Collect table level ndv, nulls, min and max. tableStats contains at
most 1 item;
for (ColumnStatisticsObj tableStat : tableStats) {
@@ -117,7 +119,7 @@ public class HiveAnalysisTask extends HMSAnalysisTask {
parameters = partition.getParameters();
// Collect row count, null number and timestamp.
setParameterData(parameters, params);
- params.put("id", String.valueOf(tbl.getId()) + "-" +
String.valueOf(col.getName()) + "-" + partName);
+ params.put("id", genColumnStatId(tbl.getId(), -1, col.getName(),
partName));
params.put("partId", partName);
List<ColumnStatisticsObj> value = entry.getValue();
Preconditions.checkState(value.size() == 1);
@@ -138,6 +140,7 @@ public class HiveAnalysisTask extends HMSAnalysisTask {
this.stmtExecutor.execute();
}
}
+ Env.getCurrentEnv().getStatisticsCache().refreshSync(tbl.getId(), -1,
col.getName());
}
private void getStatData(ColumnStatisticsData data, Map<String, String>
params) {
@@ -226,4 +229,18 @@ public class HiveAnalysisTask extends HMSAnalysisTask {
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
params.put("update_time", sdf.format(new Date(timestamp * 1000)));
}
+
+ private String genColumnStatId(long tableId, long indexId, String
columnName, String partitionName) {
+ StringBuilder sb = new StringBuilder();
+ sb.append(tableId);
+ sb.append(DELIMITER);
+ sb.append(indexId);
+ sb.append(DELIMITER);
+ sb.append(columnName);
+ if (partitionName != null) {
+ sb.append(DELIMITER);
+ sb.append(partitionName);
+ }
+ return sb.toString();
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]