This is an automated email from the ASF dual-hosted git repository.
kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push:
new 388a15876dd [fix](stats) truncate min/max if too long (#27955) (#29196)
388a15876dd is described below
commit 388a15876dda9f6e6647c319a522c8e17d5f156f
Author: Jibing-Li <[email protected]>
AuthorDate: Thu Dec 28 17:13:12 2023 +0800
[fix](stats) truncate min/max if too long (#27955) (#29196)
---
.../apache/doris/statistics/AnalysisManager.java | 4 ++-
.../apache/doris/statistics/BaseAnalysisTask.java | 16 ++++-----
.../apache/doris/statistics/JdbcAnalysisTask.java | 4 +--
.../apache/doris/statistics/OlapAnalysisTask.java | 4 +--
.../doris/statistics/OlapAnalysisTaskTest.java | 39 ++++++++++++++++++++--
5 files changed, 51 insertions(+), 16 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
index f5c14f59fa5..0bf24e0c288 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
@@ -542,7 +542,9 @@ public class AnalysisManager implements Writable {
AnalysisInfoBuilder colTaskInfoBuilder = new
AnalysisInfoBuilder(jobInfo);
if (jobInfo.analysisType != AnalysisType.HISTOGRAM) {
colTaskInfoBuilder.setAnalysisType(AnalysisType.FUNDAMENTALS);
-
colTaskInfoBuilder.setColToPartitions(Collections.singletonMap(colName,
entry.getValue()));
+ Map<String, Set<String>> colToParts = new HashMap<>();
+ colToParts.put(colName, entry.getValue());
+ colTaskInfoBuilder.setColToPartitions(colToParts);
}
AnalysisInfo analysisInfo =
colTaskInfoBuilder.setColName(colName).setIndexId(indexId)
.setTaskId(taskId).setLastExecTimeInMs(System.currentTimeMillis()).build();
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java
index cdd5c3fc69f..27101a1d66f 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java
@@ -58,8 +58,8 @@ public abstract class BaseAnalysisTask {
+ " COUNT(1) AS `row_count`, "
+ " NDV(`${colName}`) AS `ndv`, "
+ " COUNT(1) - COUNT(`${colName}`) AS `null_count`, "
- + " CAST(MIN(`${colName}`) AS STRING) AS `min`, "
- + " CAST(MAX(`${colName}`) AS STRING) AS `max`, "
+ + " SUBSTRING(CAST(MIN(`${colName}`) AS STRING), 1, 1024)
AS `min`, "
+ + " SUBSTRING(CAST(MAX(`${colName}`) AS STRING), 1, 1024)
AS `max`, "
+ " ${dataSizeFunction} AS `data_size`, "
+ " NOW() AS `update_time` "
+ " FROM `${catalogName}`.`${dbName}`.`${tblName}`";
@@ -75,8 +75,8 @@ public abstract class BaseAnalysisTask {
+ "${rowCount} AS `row_count`, "
+ "${ndvFunction} as `ndv`, "
+ "ROUND(SUM(CASE WHEN `${colName}` IS NULL THEN 1 ELSE 0 END) *
${scaleFactor}) AS `null_count`, "
- + "${min} AS `min`, "
- + "${max} AS `max`, "
+ + "SUBSTRING(CAST(${min} AS STRING), 1, 1024) AS `min`, "
+ + "SUBSTRING(CAST(${max} AS STRING), 1, 1024) AS `max`, "
+ "${dataSizeFunction} * ${scaleFactor} AS `data_size`, "
+ "NOW() "
+ "FROM `${catalogName}`.`${dbName}`.`${tblName}` ${sampleHints}
${limit}";
@@ -92,8 +92,8 @@ public abstract class BaseAnalysisTask {
+ "${rowCount} AS `row_count`, "
+ "${ndvFunction} as `ndv`, "
+ "IFNULL(SUM(IF(`t1`.`column_key` IS NULL, `t1`.`count`, 0)), 0)
* ${scaleFactor} as `null_count`, "
- + "${min} AS `min`, "
- + "${max} AS `max`, "
+ + "SUBSTRING(CAST(${min} AS STRING), 1, 1024) AS `min`, "
+ + "SUBSTRING(CAST(${max} AS STRING), 1, 1024) AS `max`, "
+ "${dataSizeFunction} * ${scaleFactor} AS `data_size`, "
+ "NOW() "
+ "FROM ( "
@@ -115,8 +115,8 @@ public abstract class BaseAnalysisTask {
+ "${row_count} AS `row_count`, "
+ "${ndv} AS `ndv`, "
+ "${null_count} AS `null_count`, "
- + "${min} AS `min`, "
- + "${max} AS `max`, "
+ + "SUBSTRING(CAST(${min} AS STRING), 1, 1024) AS `min`, "
+ + "SUBSTRING(CAST(${max} AS STRING), 1, 1024) AS `max`, "
+ "${data_size} AS `data_size`, "
+ "NOW() ";
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/JdbcAnalysisTask.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/JdbcAnalysisTask.java
index 50c437fa8f9..2bf72843a71 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/JdbcAnalysisTask.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/JdbcAnalysisTask.java
@@ -41,8 +41,8 @@ public class JdbcAnalysisTask extends BaseAnalysisTask {
+ "COUNT(1) AS row_count, "
+ "NDV(`${colName}`) AS ndv, "
+ "SUM(CASE WHEN `${colName}` IS NULL THEN 1 ELSE 0 END) AS
null_count, "
- + "MIN(`${colName}`) AS min, "
- + "MAX(`${colName}`) AS max, "
+ + "SUBSTRING(CAST(MIN(`${colName}`) AS STRING), 1, 1024) AS min, "
+ + "SUBSTRING(CAST(MAX(`${colName}`) AS STRING), 1, 1024) AS max, "
+ "${dataSizeFunction} AS data_size, "
+ "NOW() "
+ "FROM `${catalogName}`.`${dbName}`.`${tblName}`";
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
index 396e25b5a16..50042e4610f 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
@@ -47,8 +47,8 @@ import java.util.stream.Collectors;
public class OlapAnalysisTask extends BaseAnalysisTask {
private static final String BASIC_STATS_TEMPLATE = "SELECT "
- + "MIN(`${colName}`) as min, "
- + "MAX(`${colName}`) as max "
+ + "SUBSTRING(CAST(MIN(`${colName}`) AS STRING), 1, 1024) as min, "
+ + "SUBSTRING(CAST(MAX(`${colName}`) AS STRING), 1, 1024) as max "
+ "FROM `${dbName}`.`${tblName}`";
@VisibleForTesting
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java
b/fe/fe-core/src/test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java
index ed9122f70b6..5b27c79c863 100644
---
a/fe/fe-core/src/test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java
+++
b/fe/fe-core/src/test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java
@@ -150,7 +150,20 @@ public class OlapAnalysisTaskTest {
@Mock
public void runQuery(String sql) {
- Assertions.assertEquals("SELECT CONCAT('30001', '-', '-1',
'-', 'null') AS `id`, 10001 AS `catalog_id`, 20001 AS `db_id`, 30001 AS
`tbl_id`, -1 AS `idx_id`, 'null' AS `col_id`, NULL AS `part_id`, 500 AS
`row_count`, SUM(`t1`.`count`) * COUNT(1) / (SUM(`t1`.`count`) -
SUM(IF(`t1`.`count` = 1, 1, 0)) + SUM(IF(`t1`.`count` = 1, 1, 0)) *
SUM(`t1`.`count`) / 500) as `ndv`, IFNULL(SUM(IF(`t1`.`column_key` IS NULL,
`t1`.`count`, 0)), 0) * 5.0 as `null_count`, '1' AS `min`, '2' A [...]
+ Assertions.assertEquals("SELECT CONCAT('30001', '-', '-1',
'-', 'null') "
+ + "AS `id`, 10001 AS `catalog_id`, 20001 AS `db_id`,
30001 AS `tbl_id`, "
+ + "-1 AS `idx_id`, 'null' AS `col_id`, NULL AS
`part_id`, 500 AS"
+ + " `row_count`, SUM(`t1`.`count`) * COUNT(1) /
(SUM(`t1`.`count`)"
+ + " - SUM(IF(`t1`.`count` = 1, 1, 0)) +
SUM(IF(`t1`.`count` = 1, 1, 0))"
+ + " * SUM(`t1`.`count`) / 500) as `ndv`,
IFNULL(SUM(IF(`t1`.`column_key`"
+ + " IS NULL, `t1`.`count`, 0)), 0) * 5.0 as
`null_count`, "
+ + "SUBSTRING(CAST('1' AS STRING), 1, 1024) AS `min`,"
+ + " SUBSTRING(CAST('2' AS STRING), 1, 1024) AS `max`, "
+ + "SUM(LENGTH(`column_key`) * count) * 5.0 AS
`data_size`, NOW() "
+ + "FROM ( SELECT t0.`${colName}` as `column_key`,
COUNT(1) "
+ + "as `count` FROM (SELECT `${colName}` FROM "
+ + "`catalogName`.`${dbName}`.`${tblName}` "
+ + " limit 100) as `t0` GROUP BY `t0`.`${colName}`
) as `t1` ", sql);
return;
}
};
@@ -216,7 +229,16 @@ public class OlapAnalysisTaskTest {
@Mock
public void runQuery(String sql) {
- Assertions.assertEquals(" SELECT CONCAT(30001, '-', -1, '-',
'null') AS `id`, 10001 AS `catalog_id`, 20001 AS `db_id`, 30001 AS `tbl_id`, -1
AS `idx_id`, 'null' AS `col_id`, NULL AS `part_id`, 500 AS `row_count`,
ROUND(NDV(`${colName}`) * 5.0) as `ndv`, ROUND(SUM(CASE WHEN `${colName}` IS
NULL THEN 1 ELSE 0 END) * 5.0) AS `null_count`, '1' AS `min`, '2' AS `max`,
SUM(LENGTH(`${colName}`)) * 5.0 AS `data_size`, NOW() FROM
`catalogName`.`${dbName}`.`${tblName}` limit 100", sql);
+ Assertions.assertEquals(" "
+ + "SELECT CONCAT(30001, '-', -1, '-', 'null') AS `id`,
"
+ + "10001 AS `catalog_id`, 20001 AS `db_id`, 30001 AS
`tbl_id`, "
+ + "-1 AS `idx_id`, 'null' AS `col_id`, NULL AS
`part_id`, "
+ + "500 AS `row_count`, ROUND(NDV(`${colName}`) * 5.0)
as `ndv`, "
+ + "ROUND(SUM(CASE WHEN `${colName}` IS NULL THEN 1
ELSE 0 END) * 5.0) "
+ + "AS `null_count`, SUBSTRING(CAST('1' AS STRING), 1,
1024) AS `min`, "
+ + "SUBSTRING(CAST('2' AS STRING), 1, 1024) AS `max`, "
+ + "SUM(LENGTH(`${colName}`)) * 5.0 AS `data_size`,
NOW() "
+ + "FROM `catalogName`.`${dbName}`.`${tblName}` limit
100", sql);
return;
}
};
@@ -289,7 +311,18 @@ public class OlapAnalysisTaskTest {
@Mock
public void runQuery(String sql) {
- Assertions.assertEquals("SELECT CONCAT('30001', '-', '-1',
'-', 'null') AS `id`, 10001 AS `catalog_id`, 20001 AS `db_id`, 30001 AS
`tbl_id`, -1 AS `idx_id`, 'null' AS `col_id`, NULL AS `part_id`, 500 AS
`row_count`, SUM(`t1`.`count`) * COUNT(1) / (SUM(`t1`.`count`) -
SUM(IF(`t1`.`count` = 1, 1, 0)) + SUM(IF(`t1`.`count` = 1, 1, 0)) *
SUM(`t1`.`count`) / 500) as `ndv`, IFNULL(SUM(IF(`t1`.`column_key` IS NULL,
`t1`.`count`, 0)), 0) * 5.0 as `null_count`, '1' AS `min`, '2' A [...]
+ System.out.println(sql);
+ Assertions.assertEquals("SELECT CONCAT('30001', '-', '-1',
'-', 'null') "
+ + "AS `id`, 10001 AS `catalog_id`, 20001 AS `db_id`,
30001 AS `tbl_id`, "
+ + "-1 AS `idx_id`, 'null' AS `col_id`, NULL AS
`part_id`,"
+ + " 500 AS `row_count`, SUM(`t1`.`count`) * COUNT(1) /
(SUM(`t1`.`count`) "
+ + "- SUM(IF(`t1`.`count` = 1, 1, 0)) +
SUM(IF(`t1`.`count` = 1, 1, 0)) * "
+ + "SUM(`t1`.`count`) / 500) as `ndv`,
IFNULL(SUM(IF(`t1`.`column_key` "
+ + "IS NULL, `t1`.`count`, 0)), 0) * 5.0 as
`null_count`, "
+ + "SUBSTRING(CAST('1' AS STRING), 1, 1024) AS `min`, "
+ + "SUBSTRING(CAST('2' AS STRING), 1, 1024) AS `max`, "
+ + "SUM(LENGTH(`column_key`) * count) * 5.0 AS
`data_size`, NOW() "
+ + "FROM ( SELECT t0.`${colName}` as `column_key`,
COUNT(1) as `count` FROM (SELECT `${colName}` FROM
`catalogName`.`${dbName}`.`${tblName}` limit 100) as `t0` GROUP BY
`t0`.`${colName}` ) as `t1` ", sql);
return;
}
};
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]