This is an automated email from the ASF dual-hosted git repository.
morrysnow pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 7f1b558011b [fix](stats) truncate min/max if too long (#27955)
7f1b558011b is described below
commit 7f1b558011bfd1bc646d50a5906ae26f4a0a8dd9
Author: AKIRA <[email protected]>
AuthorDate: Tue Dec 5 20:40:38 2023 +0800
[fix](stats) truncate min/max if too long (#27955)
For some string value the max/min might be a very long string
which might take too many memory of FE,
so we truncate to 1024 chars if it's too long
---
.../apache/doris/statistics/AnalysisManager.java | 4 +-
.../apache/doris/statistics/BaseAnalysisTask.java | 16 ++++----
.../apache/doris/statistics/JdbcAnalysisTask.java | 4 +-
.../apache/doris/statistics/OlapAnalysisTask.java | 4 +-
.../doris/statistics/OlapAnalysisTaskTest.java | 39 ++++++++++++++++++--
.../suites/statistics/analyze_stats.groovy | 43 +++++++++++++++++-----
6 files changed, 84 insertions(+), 26 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
index 4f62c3b875a..3a6344e5547 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
@@ -544,7 +544,9 @@ public class AnalysisManager implements Writable {
AnalysisInfoBuilder colTaskInfoBuilder = new
AnalysisInfoBuilder(jobInfo);
if (jobInfo.analysisType != AnalysisType.HISTOGRAM) {
colTaskInfoBuilder.setAnalysisType(AnalysisType.FUNDAMENTALS);
-
colTaskInfoBuilder.setColToPartitions(Collections.singletonMap(colName,
entry.getValue()));
+ Map<String, Set<String>> colToParts = new HashMap<>();
+ colToParts.put(colName, entry.getValue());
+ colTaskInfoBuilder.setColToPartitions(colToParts);
}
AnalysisInfo analysisInfo =
colTaskInfoBuilder.setColName(colName).setIndexId(indexId)
.setTaskId(taskId).setLastExecTimeInMs(System.currentTimeMillis()).build();
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java
index cdd5c3fc69f..27101a1d66f 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java
@@ -58,8 +58,8 @@ public abstract class BaseAnalysisTask {
+ " COUNT(1) AS `row_count`, "
+ " NDV(`${colName}`) AS `ndv`, "
+ " COUNT(1) - COUNT(`${colName}`) AS `null_count`, "
- + " CAST(MIN(`${colName}`) AS STRING) AS `min`, "
- + " CAST(MAX(`${colName}`) AS STRING) AS `max`, "
+ + " SUBSTRING(CAST(MIN(`${colName}`) AS STRING), 1, 1024)
AS `min`, "
+ + " SUBSTRING(CAST(MAX(`${colName}`) AS STRING), 1, 1024)
AS `max`, "
+ " ${dataSizeFunction} AS `data_size`, "
+ " NOW() AS `update_time` "
+ " FROM `${catalogName}`.`${dbName}`.`${tblName}`";
@@ -75,8 +75,8 @@ public abstract class BaseAnalysisTask {
+ "${rowCount} AS `row_count`, "
+ "${ndvFunction} as `ndv`, "
+ "ROUND(SUM(CASE WHEN `${colName}` IS NULL THEN 1 ELSE 0 END) *
${scaleFactor}) AS `null_count`, "
- + "${min} AS `min`, "
- + "${max} AS `max`, "
+ + "SUBSTRING(CAST(${min} AS STRING), 1, 1024) AS `min`, "
+ + "SUBSTRING(CAST(${max} AS STRING), 1, 1024) AS `max`, "
+ "${dataSizeFunction} * ${scaleFactor} AS `data_size`, "
+ "NOW() "
+ "FROM `${catalogName}`.`${dbName}`.`${tblName}` ${sampleHints}
${limit}";
@@ -92,8 +92,8 @@ public abstract class BaseAnalysisTask {
+ "${rowCount} AS `row_count`, "
+ "${ndvFunction} as `ndv`, "
+ "IFNULL(SUM(IF(`t1`.`column_key` IS NULL, `t1`.`count`, 0)), 0)
* ${scaleFactor} as `null_count`, "
- + "${min} AS `min`, "
- + "${max} AS `max`, "
+ + "SUBSTRING(CAST(${min} AS STRING), 1, 1024) AS `min`, "
+ + "SUBSTRING(CAST(${max} AS STRING), 1, 1024) AS `max`, "
+ "${dataSizeFunction} * ${scaleFactor} AS `data_size`, "
+ "NOW() "
+ "FROM ( "
@@ -115,8 +115,8 @@ public abstract class BaseAnalysisTask {
+ "${row_count} AS `row_count`, "
+ "${ndv} AS `ndv`, "
+ "${null_count} AS `null_count`, "
- + "${min} AS `min`, "
- + "${max} AS `max`, "
+ + "SUBSTRING(CAST(${min} AS STRING), 1, 1024) AS `min`, "
+ + "SUBSTRING(CAST(${max} AS STRING), 1, 1024) AS `max`, "
+ "${data_size} AS `data_size`, "
+ "NOW() ";
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/JdbcAnalysisTask.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/JdbcAnalysisTask.java
index 50c437fa8f9..2bf72843a71 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/JdbcAnalysisTask.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/JdbcAnalysisTask.java
@@ -41,8 +41,8 @@ public class JdbcAnalysisTask extends BaseAnalysisTask {
+ "COUNT(1) AS row_count, "
+ "NDV(`${colName}`) AS ndv, "
+ "SUM(CASE WHEN `${colName}` IS NULL THEN 1 ELSE 0 END) AS
null_count, "
- + "MIN(`${colName}`) AS min, "
- + "MAX(`${colName}`) AS max, "
+ + "SUBSTRING(CAST(MIN(`${colName}`) AS STRING), 1, 1024) AS min, "
+ + "SUBSTRING(CAST(MAX(`${colName}`) AS STRING), 1, 1024) AS max, "
+ "${dataSizeFunction} AS data_size, "
+ "NOW() "
+ "FROM `${catalogName}`.`${dbName}`.`${tblName}`";
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
index bf144b1a116..89ed24e7904 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
@@ -46,8 +46,8 @@ import java.util.stream.Collectors;
public class OlapAnalysisTask extends BaseAnalysisTask {
private static final String BASIC_STATS_TEMPLATE = "SELECT "
- + "MIN(`${colName}`) as min, "
- + "MAX(`${colName}`) as max "
+ + "SUBSTRING(CAST(MIN(`${colName}`) AS STRING), 1, 1024) as min, "
+ + "SUBSTRING(CAST(MAX(`${colName}`) AS STRING), 1, 1024) as max "
+ "FROM `${dbName}`.`${tblName}`";
@VisibleForTesting
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java
b/fe/fe-core/src/test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java
index ed9122f70b6..5b27c79c863 100644
---
a/fe/fe-core/src/test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java
+++
b/fe/fe-core/src/test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java
@@ -150,7 +150,20 @@ public class OlapAnalysisTaskTest {
@Mock
public void runQuery(String sql) {
- Assertions.assertEquals("SELECT CONCAT('30001', '-', '-1',
'-', 'null') AS `id`, 10001 AS `catalog_id`, 20001 AS `db_id`, 30001 AS
`tbl_id`, -1 AS `idx_id`, 'null' AS `col_id`, NULL AS `part_id`, 500 AS
`row_count`, SUM(`t1`.`count`) * COUNT(1) / (SUM(`t1`.`count`) -
SUM(IF(`t1`.`count` = 1, 1, 0)) + SUM(IF(`t1`.`count` = 1, 1, 0)) *
SUM(`t1`.`count`) / 500) as `ndv`, IFNULL(SUM(IF(`t1`.`column_key` IS NULL,
`t1`.`count`, 0)), 0) * 5.0 as `null_count`, '1' AS `min`, '2' A [...]
+ Assertions.assertEquals("SELECT CONCAT('30001', '-', '-1',
'-', 'null') "
+ + "AS `id`, 10001 AS `catalog_id`, 20001 AS `db_id`,
30001 AS `tbl_id`, "
+ + "-1 AS `idx_id`, 'null' AS `col_id`, NULL AS
`part_id`, 500 AS"
+ + " `row_count`, SUM(`t1`.`count`) * COUNT(1) /
(SUM(`t1`.`count`)"
+ + " - SUM(IF(`t1`.`count` = 1, 1, 0)) +
SUM(IF(`t1`.`count` = 1, 1, 0))"
+ + " * SUM(`t1`.`count`) / 500) as `ndv`,
IFNULL(SUM(IF(`t1`.`column_key`"
+ + " IS NULL, `t1`.`count`, 0)), 0) * 5.0 as
`null_count`, "
+ + "SUBSTRING(CAST('1' AS STRING), 1, 1024) AS `min`,"
+ + " SUBSTRING(CAST('2' AS STRING), 1, 1024) AS `max`, "
+ + "SUM(LENGTH(`column_key`) * count) * 5.0 AS
`data_size`, NOW() "
+ + "FROM ( SELECT t0.`${colName}` as `column_key`,
COUNT(1) "
+ + "as `count` FROM (SELECT `${colName}` FROM "
+ + "`catalogName`.`${dbName}`.`${tblName}` "
+ + " limit 100) as `t0` GROUP BY `t0`.`${colName}`
) as `t1` ", sql);
return;
}
};
@@ -216,7 +229,16 @@ public class OlapAnalysisTaskTest {
@Mock
public void runQuery(String sql) {
- Assertions.assertEquals(" SELECT CONCAT(30001, '-', -1, '-',
'null') AS `id`, 10001 AS `catalog_id`, 20001 AS `db_id`, 30001 AS `tbl_id`, -1
AS `idx_id`, 'null' AS `col_id`, NULL AS `part_id`, 500 AS `row_count`,
ROUND(NDV(`${colName}`) * 5.0) as `ndv`, ROUND(SUM(CASE WHEN `${colName}` IS
NULL THEN 1 ELSE 0 END) * 5.0) AS `null_count`, '1' AS `min`, '2' AS `max`,
SUM(LENGTH(`${colName}`)) * 5.0 AS `data_size`, NOW() FROM
`catalogName`.`${dbName}`.`${tblName}` limit 100", sql);
+ Assertions.assertEquals(" "
+ + "SELECT CONCAT(30001, '-', -1, '-', 'null') AS `id`,
"
+ + "10001 AS `catalog_id`, 20001 AS `db_id`, 30001 AS
`tbl_id`, "
+ + "-1 AS `idx_id`, 'null' AS `col_id`, NULL AS
`part_id`, "
+ + "500 AS `row_count`, ROUND(NDV(`${colName}`) * 5.0)
as `ndv`, "
+ + "ROUND(SUM(CASE WHEN `${colName}` IS NULL THEN 1
ELSE 0 END) * 5.0) "
+ + "AS `null_count`, SUBSTRING(CAST('1' AS STRING), 1,
1024) AS `min`, "
+ + "SUBSTRING(CAST('2' AS STRING), 1, 1024) AS `max`, "
+ + "SUM(LENGTH(`${colName}`)) * 5.0 AS `data_size`,
NOW() "
+ + "FROM `catalogName`.`${dbName}`.`${tblName}` limit
100", sql);
return;
}
};
@@ -289,7 +311,18 @@ public class OlapAnalysisTaskTest {
@Mock
public void runQuery(String sql) {
- Assertions.assertEquals("SELECT CONCAT('30001', '-', '-1',
'-', 'null') AS `id`, 10001 AS `catalog_id`, 20001 AS `db_id`, 30001 AS
`tbl_id`, -1 AS `idx_id`, 'null' AS `col_id`, NULL AS `part_id`, 500 AS
`row_count`, SUM(`t1`.`count`) * COUNT(1) / (SUM(`t1`.`count`) -
SUM(IF(`t1`.`count` = 1, 1, 0)) + SUM(IF(`t1`.`count` = 1, 1, 0)) *
SUM(`t1`.`count`) / 500) as `ndv`, IFNULL(SUM(IF(`t1`.`column_key` IS NULL,
`t1`.`count`, 0)), 0) * 5.0 as `null_count`, '1' AS `min`, '2' A [...]
+ System.out.println(sql);
+ Assertions.assertEquals("SELECT CONCAT('30001', '-', '-1',
'-', 'null') "
+ + "AS `id`, 10001 AS `catalog_id`, 20001 AS `db_id`,
30001 AS `tbl_id`, "
+ + "-1 AS `idx_id`, 'null' AS `col_id`, NULL AS
`part_id`,"
+ + " 500 AS `row_count`, SUM(`t1`.`count`) * COUNT(1) /
(SUM(`t1`.`count`) "
+ + "- SUM(IF(`t1`.`count` = 1, 1, 0)) +
SUM(IF(`t1`.`count` = 1, 1, 0)) * "
+ + "SUM(`t1`.`count`) / 500) as `ndv`,
IFNULL(SUM(IF(`t1`.`column_key` "
+ + "IS NULL, `t1`.`count`, 0)), 0) * 5.0 as
`null_count`, "
+ + "SUBSTRING(CAST('1' AS STRING), 1, 1024) AS `min`, "
+ + "SUBSTRING(CAST('2' AS STRING), 1, 1024) AS `max`, "
+ + "SUM(LENGTH(`column_key`) * count) * 5.0 AS
`data_size`, NOW() "
+ + "FROM ( SELECT t0.`${colName}` as `column_key`,
COUNT(1) as `count` FROM (SELECT `${colName}` FROM
`catalogName`.`${dbName}`.`${tblName}` limit 100) as `t0` GROUP BY
`t0`.`${colName}` ) as `t1` ", sql);
return;
}
};
diff --git a/regression-test/suites/statistics/analyze_stats.groovy
b/regression-test/suites/statistics/analyze_stats.groovy
index 333772972ec..9a06ed772b5 100644
--- a/regression-test/suites/statistics/analyze_stats.groovy
+++ b/regression-test/suites/statistics/analyze_stats.groovy
@@ -95,7 +95,7 @@ suite("test_analyze") {
sql """
SET enable_nereids_planner=true;
-
+
"""
sql """
@@ -124,7 +124,7 @@ suite("test_analyze") {
Thread.sleep(1000 * 60)
sql """
- SELECT * FROM ${tbl};
+ SELECT * FROM ${tbl};
"""
sql """
@@ -135,7 +135,7 @@ suite("test_analyze") {
try {
sql """
- SELECT * FROM ${tbl};
+ SELECT * FROM ${tbl};
"""
} catch (Exception e) {
exception = e
@@ -150,7 +150,7 @@ suite("test_analyze") {
"""
sql """
- SELECT * FROM ${tbl};
+ SELECT * FROM ${tbl};
"""
sql """
@@ -159,7 +159,7 @@ suite("test_analyze") {
try {
sql """
- SELECT * FROM ${tbl};
+ SELECT * FROM ${tbl};
"""
} catch (Exception e) {
exception = e
@@ -268,7 +268,7 @@ suite("test_analyze") {
"""
sql """
-
+
CREATE TAbLE IF NOT EXISTS test_600_partition_table_analyze (
`id` INT NOT NULL
) ENGINE=OLAP
@@ -882,7 +882,7 @@ PARTITION `p599` VALUES IN (599)
(
"replication_num" = "1"
);
-
+
"""
sql """
@@ -932,7 +932,7 @@ PARTITION `p599` VALUES IN (599)
(
PARTITION `p1` VALUES LESS THAN ('5')
)
-
+
DISTRIBUTED BY HASH(`id`) BUCKETS 3
PROPERTIES (
"replication_num"="1"
@@ -1107,7 +1107,7 @@ PARTITION `p599` VALUES IN (599)
DISTRIBUTED BY HASH(`col1`) BUCKETS 3
PROPERTIES (
"replication_allocation" = "tag.location.default: 1"
- );
+ );
"""
sql """insert into test_meta_management values(1, 2, 3);"""
@@ -1197,7 +1197,7 @@ PARTITION `p599` VALUES IN (599)
DISTRIBUTED BY HASH(`col1`) BUCKETS 3
PROPERTIES (
"replication_allocation" = "tag.location.default: 1"
- );
+ );
"""
sql """INSERT INTO test_max_min_lit VALUES("\\'")"""
@@ -1244,6 +1244,7 @@ PARTITION `p599` VALUES IN (599)
assert all_finished(show_result)
+
// Test truncate table will drop table stats too.
sql """ANALYZE TABLE ${tbl} WITH SYNC"""
def result_before_truncate = sql """show column stats ${tbl}"""
@@ -1254,4 +1255,26 @@ PARTITION `p599` VALUES IN (599)
result_after_truncate = sql """show column cached stats ${tbl}"""
assertEquals(0, result_after_truncate.size())
+
+
+ sql """
+ delete from ${tbl} where analyzetestlimitedk3 >= -2147483648
+ """
+ sql """
+ INSERT INTO `${tbl}` VALUES
(-2103297891,1,101,15248,4761818404925265645,939926.283,
+ 'UTmCFKMbprf0zSVOIlBJRNOl3JcNBdOsnCDt','2022-09-28','2022-10-28
01:56:56','tVvGDSrN6kyn',
+
-954349107.187117,-40.46286,'11111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111
[...]
+ '-1559301292834325905', NULL, NULL, NULL, NULL)
+ """
+
+ sql """
+ ANALYZE TABLE ${tbl} WITH SYNC
+ """
+
+ def truncate_test_result = sql """
+ SHOW COLUMN CACHED STATS ${tbl}(analyzetestlimitedk12)
+ """
+ assert
"111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111
[...]
+ assert
"111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111
[...]
+
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]