morrySnow commented on code in PR #23507:
URL: https://github.com/apache/doris/pull/23507#discussion_r1306851837
##########
fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java:
##########
@@ -127,80 +130,24 @@ public void execSQLs(List<String> partitionAnalysisSQLs,
Map<String, String> par
queryState.getErrorMessage()));
}
}
- if (buf.size() > 1) {
- for (List<ColStatsData> colStatsDataList : buf) {
- StringBuilder batchInsertSQL =
- new StringBuilder("INSERT INTO
__internal_schema.column_statistics VALUES ");
- StringJoiner sj = new StringJoiner(",");
- colStatsDataList.forEach(c -> sj.add(c.toSQL(true)));
- batchInsertSQL.append(sj.toString());
- stmtExecutor = new StmtExecutor(r.connectContext,
batchInsertSQL.toString());
- executeWithExceptionOnFail(stmtExecutor);
- }
- params.put("type", col.getType().toString());
- StringSubstitutor stringSubstitutor = new
StringSubstitutor(params);
- String sql =
stringSubstitutor.replace(ANALYZE_COLUMN_SQL_TEMPLATE);
- stmtExecutor = new StmtExecutor(r.connectContext, sql);
- executeWithExceptionOnFail(stmtExecutor);
- } else {
- List<ColStatsData> colStatsDataList = buf.get(0);
- String batchInsertSQLTemplate = "INSERT INTO
__internal_schema.column_statistics "
- + "SELECT id, catalog_id, db_id, tbl_id, idx_id,
col_id, part_id, row_count,"
- + "ndv, null_count, CAST(min AS string), CAST(max AS
string), data_size, update_time FROM ("
- + "SELECT CONCAT(${tblId}, '-', ${idxId}, '-',
'${colId}') AS id, "
- + " ${catalogId} AS catalog_id, "
- + " ${dbId} AS db_id, "
- + " ${tblId} AS tbl_id, "
- + " ${idxId} AS idx_id, "
- + " '${colId}' AS col_id, "
- + " NULL AS part_id, "
- + " SUM(count) AS row_count,"
- + " SUM(null_count) AS null_count, "
- + " MIN(CAST (min AS ${type})) AS min, "
- + " MAX(CAST (max AS ${type})) AS max, "
- + " SUM(data_size_in_bytes) AS data_size, "
- + " NOW() AS update_time"
- + " FROM (${partitionStatsView}) psv) t1, "
- + " (SELECT NDV(`${colName}`) AS ndv "
- + " FROM `${dbName}`.`${tblName}` ${sampleExpr})
t2 UNION ALL ${partitionStatsView}";
- StringJoiner sj = new StringJoiner(" UNION ALL ");
- String selectPartitionTemplate =
- "SELECT %s AS id,"
- + "%s AS catalog_id,"
- + "%s AS db_id,"
- + "%s AS tbl_id,"
- + "%s AS idx_id,"
- + "%s AS col_id,"
- + "%s AS part_id,"
- + "%s AS count,"
- + "%s AS ndv,"
- + "%s AS null_count,"
- + "%s as min,"
- + "%s as max,"
- + "%s as data_size_in_bytes,"
- + "%s AS update_time";
- colStatsDataList.forEach(c ->
sj.add(String.format(selectPartitionTemplate,
- StatisticsUtil.quote(c.statsId.id),
- c.statsId.catalogId,
- c.statsId.dbId,
- c.statsId.tblId,
- c.statsId.idxId,
- StatisticsUtil.quote(c.statsId.colId),
- c.statsId.partId,
- c.count,
- c.ndv,
- c.nullCount,
- c.minLit == null ? null :
StatisticsUtil.quote(StatisticsUtil.escapeSQL(c.minLit)),
- c.maxLit == null ? null :
StatisticsUtil.quote(StatisticsUtil.escapeSQL(c.maxLit)),
- c.dataSizeInBytes,
- StatisticsUtil.quote(c.updateTime))));
- params.put("partitionStatsView", sj.toString());
- params.put("type", col.getType().toString());
- StringSubstitutor stringSubstitutor = new
StringSubstitutor(params);
- String insertSQL =
stringSubstitutor.replace(batchInsertSQLTemplate);
- stmtExecutor = new StmtExecutor(r.connectContext, insertSQL);
+ for (List<ColStatsData> colStatsDataList : buf) {
+ StringBuilder batchInsertSQL =
+ new StringBuilder("INSERT INTO
__internal_schema.column_statistics VALUES ");
+ StringJoiner sj = new StringJoiner(",");
+ colStatsDataList.forEach(c -> sj.add(c.toSQL(true)));
+ batchInsertSQL.append(sj.toString());
+ stmtExecutor = new StmtExecutor(r.connectContext,
batchInsertSQL.toString());
executeWithExceptionOnFail(stmtExecutor);
}
+ params.put("type", col.getType().toString());
+ StringSubstitutor stringSubstitutor = new
StringSubstitutor(params);
+ String sql =
stringSubstitutor.replace(ANALYZE_COLUMN_SQL_TEMPLATE);
+ stmtExecutor = new StmtExecutor(r.connectContext, sql);
+ executeWithExceptionOnFail(stmtExecutor);
+ } finally {
+ LOG.debug("ANALYZE TASK {} END COST TIME {}", info,
+ (System.currentTimeMillis() - startTime) + "ms");
Review Comment:
```suggestion
LOG.debug("analyze task {} end. cost {}ms", info,
System.currentTimeMillis() - startTime);
```
##########
fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java:
##########
@@ -127,80 +130,24 @@ public void execSQLs(List<String> partitionAnalysisSQLs,
Map<String, String> par
queryState.getErrorMessage()));
}
}
- if (buf.size() > 1) {
- for (List<ColStatsData> colStatsDataList : buf) {
- StringBuilder batchInsertSQL =
- new StringBuilder("INSERT INTO
__internal_schema.column_statistics VALUES ");
- StringJoiner sj = new StringJoiner(",");
- colStatsDataList.forEach(c -> sj.add(c.toSQL(true)));
- batchInsertSQL.append(sj.toString());
- stmtExecutor = new StmtExecutor(r.connectContext,
batchInsertSQL.toString());
- executeWithExceptionOnFail(stmtExecutor);
- }
- params.put("type", col.getType().toString());
- StringSubstitutor stringSubstitutor = new
StringSubstitutor(params);
- String sql =
stringSubstitutor.replace(ANALYZE_COLUMN_SQL_TEMPLATE);
- stmtExecutor = new StmtExecutor(r.connectContext, sql);
- executeWithExceptionOnFail(stmtExecutor);
- } else {
- List<ColStatsData> colStatsDataList = buf.get(0);
- String batchInsertSQLTemplate = "INSERT INTO
__internal_schema.column_statistics "
- + "SELECT id, catalog_id, db_id, tbl_id, idx_id,
col_id, part_id, row_count,"
- + "ndv, null_count, CAST(min AS string), CAST(max AS
string), data_size, update_time FROM ("
- + "SELECT CONCAT(${tblId}, '-', ${idxId}, '-',
'${colId}') AS id, "
- + " ${catalogId} AS catalog_id, "
- + " ${dbId} AS db_id, "
- + " ${tblId} AS tbl_id, "
- + " ${idxId} AS idx_id, "
- + " '${colId}' AS col_id, "
- + " NULL AS part_id, "
- + " SUM(count) AS row_count,"
- + " SUM(null_count) AS null_count, "
- + " MIN(CAST (min AS ${type})) AS min, "
- + " MAX(CAST (max AS ${type})) AS max, "
- + " SUM(data_size_in_bytes) AS data_size, "
- + " NOW() AS update_time"
- + " FROM (${partitionStatsView}) psv) t1, "
- + " (SELECT NDV(`${colName}`) AS ndv "
- + " FROM `${dbName}`.`${tblName}` ${sampleExpr})
t2 UNION ALL ${partitionStatsView}";
- StringJoiner sj = new StringJoiner(" UNION ALL ");
- String selectPartitionTemplate =
- "SELECT %s AS id,"
- + "%s AS catalog_id,"
- + "%s AS db_id,"
- + "%s AS tbl_id,"
- + "%s AS idx_id,"
- + "%s AS col_id,"
- + "%s AS part_id,"
- + "%s AS count,"
- + "%s AS ndv,"
- + "%s AS null_count,"
- + "%s as min,"
- + "%s as max,"
- + "%s as data_size_in_bytes,"
- + "%s AS update_time";
- colStatsDataList.forEach(c ->
sj.add(String.format(selectPartitionTemplate,
- StatisticsUtil.quote(c.statsId.id),
- c.statsId.catalogId,
- c.statsId.dbId,
- c.statsId.tblId,
- c.statsId.idxId,
- StatisticsUtil.quote(c.statsId.colId),
- c.statsId.partId,
- c.count,
- c.ndv,
- c.nullCount,
- c.minLit == null ? null :
StatisticsUtil.quote(StatisticsUtil.escapeSQL(c.minLit)),
- c.maxLit == null ? null :
StatisticsUtil.quote(StatisticsUtil.escapeSQL(c.maxLit)),
- c.dataSizeInBytes,
- StatisticsUtil.quote(c.updateTime))));
- params.put("partitionStatsView", sj.toString());
- params.put("type", col.getType().toString());
- StringSubstitutor stringSubstitutor = new
StringSubstitutor(params);
- String insertSQL =
stringSubstitutor.replace(batchInsertSQLTemplate);
- stmtExecutor = new StmtExecutor(r.connectContext, insertSQL);
+ for (List<ColStatsData> colStatsDataList : buf) {
+ StringBuilder batchInsertSQL =
+ new StringBuilder("INSERT INTO
__internal_schema.column_statistics VALUES ");
Review Comment:
`__internal_schema.column_statistics` shoud use
`FULL_QUALIFIED_COLUMN_STATISTICS_NAME`
##########
fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java:
##########
@@ -2438,6 +2438,7 @@ public StatementBase setParsedStmt(StatementBase
parsedStmt) {
}
public List<ResultRow> executeInternalQuery() {
+ LOG.debug("INTERNAL QUERY: " + originStmt.toString());
Review Comment:
```suggestion
LOG.debug("execute internal query: {}", originStmt);
```
##########
fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoAnalyzer.java:
##########
@@ -70,6 +70,7 @@ protected void runAfterCatalogReady() {
if (!StatisticsUtil.statsTblAvailable()) {
return;
}
+ analyzePeriodically();
Review Comment:
perroid analyze should in a serperate thread
##########
fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java:
##########
@@ -236,6 +236,7 @@ protected void executeWithExceptionOnFail(StmtExecutor
stmtExecutor) throws Exce
if (killed) {
return;
}
+ LOG.debug("INTERNAL SQL: " + stmtExecutor.getOriginStmt().toString());
Review Comment:
```suggestion
LOG.debug("execute internal sql: {}", stmtExecutor.getOriginStmt());
```
##########
fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java:
##########
@@ -109,6 +110,8 @@ public void doExecute() throws Exception {
@VisibleForTesting
public void execSQLs(List<String> partitionAnalysisSQLs, Map<String,
String> params) throws Exception {
+ long startTime = System.currentTimeMillis();
+ LOG.debug("ANALYZE TASK {} START: {}", info.toString(), new Date());
Review Comment:
```suggestion
LOG.debug("analyze task {} start at {}", info.toString(), new
Date());
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]