This is an automated email from the ASF dual-hosted git repository.

kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.0 by this push:
     new 388a15876dd [fix](stats) truncate min/max if too long (#27955) (#29196)
388a15876dd is described below

commit 388a15876dda9f6e6647c319a522c8e17d5f156f
Author: Jibing-Li <[email protected]>
AuthorDate: Thu Dec 28 17:13:12 2023 +0800

    [fix](stats) truncate min/max if too long (#27955) (#29196)
---
 .../apache/doris/statistics/AnalysisManager.java   |  4 ++-
 .../apache/doris/statistics/BaseAnalysisTask.java  | 16 ++++-----
 .../apache/doris/statistics/JdbcAnalysisTask.java  |  4 +--
 .../apache/doris/statistics/OlapAnalysisTask.java  |  4 +--
 .../doris/statistics/OlapAnalysisTaskTest.java     | 39 ++++++++++++++++++++--
 5 files changed, 51 insertions(+), 16 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
index f5c14f59fa5..0bf24e0c288 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
@@ -542,7 +542,9 @@ public class AnalysisManager implements Writable {
             AnalysisInfoBuilder colTaskInfoBuilder = new 
AnalysisInfoBuilder(jobInfo);
             if (jobInfo.analysisType != AnalysisType.HISTOGRAM) {
                 colTaskInfoBuilder.setAnalysisType(AnalysisType.FUNDAMENTALS);
-                
colTaskInfoBuilder.setColToPartitions(Collections.singletonMap(colName, 
entry.getValue()));
+                Map<String, Set<String>> colToParts = new HashMap<>();
+                colToParts.put(colName, entry.getValue());
+                colTaskInfoBuilder.setColToPartitions(colToParts);
             }
             AnalysisInfo analysisInfo = 
colTaskInfoBuilder.setColName(colName).setIndexId(indexId)
                     
.setTaskId(taskId).setLastExecTimeInMs(System.currentTimeMillis()).build();
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java
index cdd5c3fc69f..27101a1d66f 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java
@@ -58,8 +58,8 @@ public abstract class BaseAnalysisTask {
             + "         COUNT(1) AS `row_count`, "
             + "         NDV(`${colName}`) AS `ndv`, "
             + "         COUNT(1) - COUNT(`${colName}`) AS `null_count`, "
-            + "         CAST(MIN(`${colName}`) AS STRING) AS `min`, "
-            + "         CAST(MAX(`${colName}`) AS STRING) AS `max`, "
+            + "         SUBSTRING(CAST(MIN(`${colName}`) AS STRING), 1, 1024) 
AS `min`, "
+            + "         SUBSTRING(CAST(MAX(`${colName}`) AS STRING), 1, 1024) 
AS `max`, "
             + "         ${dataSizeFunction} AS `data_size`, "
             + "         NOW() AS `update_time` "
             + " FROM `${catalogName}`.`${dbName}`.`${tblName}`";
@@ -75,8 +75,8 @@ public abstract class BaseAnalysisTask {
             + "${rowCount} AS `row_count`, "
             + "${ndvFunction} as `ndv`, "
             + "ROUND(SUM(CASE WHEN `${colName}` IS NULL THEN 1 ELSE 0 END) * 
${scaleFactor}) AS `null_count`, "
-            + "${min} AS `min`, "
-            + "${max} AS `max`, "
+            + "SUBSTRING(CAST(${min} AS STRING), 1, 1024) AS `min`, "
+            + "SUBSTRING(CAST(${max} AS STRING), 1, 1024) AS `max`, "
             + "${dataSizeFunction} * ${scaleFactor} AS `data_size`, "
             + "NOW() "
             + "FROM `${catalogName}`.`${dbName}`.`${tblName}` ${sampleHints} 
${limit}";
@@ -92,8 +92,8 @@ public abstract class BaseAnalysisTask {
             + "${rowCount} AS `row_count`, "
             + "${ndvFunction} as `ndv`, "
             + "IFNULL(SUM(IF(`t1`.`column_key` IS NULL, `t1`.`count`, 0)), 0) 
* ${scaleFactor} as `null_count`, "
-            + "${min} AS `min`, "
-            + "${max} AS `max`, "
+            + "SUBSTRING(CAST(${min} AS STRING), 1, 1024) AS `min`, "
+            + "SUBSTRING(CAST(${max} AS STRING), 1, 1024) AS `max`, "
             + "${dataSizeFunction} * ${scaleFactor} AS `data_size`, "
             + "NOW() "
             + "FROM ( "
@@ -115,8 +115,8 @@ public abstract class BaseAnalysisTask {
             + "${row_count} AS `row_count`, "
             + "${ndv} AS `ndv`, "
             + "${null_count} AS `null_count`, "
-            + "${min} AS `min`, "
-            + "${max} AS `max`, "
+            + "SUBSTRING(CAST(${min} AS STRING), 1, 1024) AS `min`, "
+            + "SUBSTRING(CAST(${max} AS STRING), 1, 1024) AS `max`, "
             + "${data_size} AS `data_size`, "
             + "NOW() ";
 
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/JdbcAnalysisTask.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/JdbcAnalysisTask.java
index 50c437fa8f9..2bf72843a71 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/JdbcAnalysisTask.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/JdbcAnalysisTask.java
@@ -41,8 +41,8 @@ public class JdbcAnalysisTask extends BaseAnalysisTask {
             + "COUNT(1) AS row_count, "
             + "NDV(`${colName}`) AS ndv, "
             + "SUM(CASE WHEN `${colName}` IS NULL THEN 1 ELSE 0 END) AS 
null_count, "
-            + "MIN(`${colName}`) AS min, "
-            + "MAX(`${colName}`) AS max, "
+            + "SUBSTRING(CAST(MIN(`${colName}`) AS STRING), 1, 1024) AS min, "
+            + "SUBSTRING(CAST(MAX(`${colName}`) AS STRING), 1, 1024) AS max, "
             + "${dataSizeFunction} AS data_size, "
             + "NOW() "
             + "FROM `${catalogName}`.`${dbName}`.`${tblName}`";
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
index 396e25b5a16..50042e4610f 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
@@ -47,8 +47,8 @@ import java.util.stream.Collectors;
 public class OlapAnalysisTask extends BaseAnalysisTask {
 
     private static final String BASIC_STATS_TEMPLATE = "SELECT "
-            + "MIN(`${colName}`) as min, "
-            + "MAX(`${colName}`) as max "
+            + "SUBSTRING(CAST(MIN(`${colName}`) AS STRING), 1, 1024) as min, "
+            + "SUBSTRING(CAST(MAX(`${colName}`) AS STRING), 1, 1024) as max "
             + "FROM `${dbName}`.`${tblName}`";
 
     @VisibleForTesting
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java
 
b/fe/fe-core/src/test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java
index ed9122f70b6..5b27c79c863 100644
--- 
a/fe/fe-core/src/test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java
+++ 
b/fe/fe-core/src/test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java
@@ -150,7 +150,20 @@ public class OlapAnalysisTaskTest {
 
             @Mock
             public void runQuery(String sql) {
-                Assertions.assertEquals("SELECT CONCAT('30001', '-', '-1', 
'-', 'null') AS `id`, 10001 AS `catalog_id`, 20001 AS `db_id`, 30001 AS 
`tbl_id`, -1 AS `idx_id`, 'null' AS `col_id`, NULL AS `part_id`, 500 AS 
`row_count`, SUM(`t1`.`count`) * COUNT(1) / (SUM(`t1`.`count`) - 
SUM(IF(`t1`.`count` = 1, 1, 0)) + SUM(IF(`t1`.`count` = 1, 1, 0)) * 
SUM(`t1`.`count`) / 500) as `ndv`, IFNULL(SUM(IF(`t1`.`column_key` IS NULL, 
`t1`.`count`, 0)), 0) * 5.0 as `null_count`, '1' AS `min`, '2' A [...]
+                Assertions.assertEquals("SELECT CONCAT('30001', '-', '-1', 
'-', 'null') "
+                        + "AS `id`, 10001 AS `catalog_id`, 20001 AS `db_id`, 
30001 AS `tbl_id`, "
+                        + "-1 AS `idx_id`, 'null' AS `col_id`, NULL AS 
`part_id`, 500 AS"
+                        + " `row_count`, SUM(`t1`.`count`) * COUNT(1) / 
(SUM(`t1`.`count`)"
+                        + " - SUM(IF(`t1`.`count` = 1, 1, 0)) + 
SUM(IF(`t1`.`count` = 1, 1, 0))"
+                        + " * SUM(`t1`.`count`) / 500) as `ndv`, 
IFNULL(SUM(IF(`t1`.`column_key`"
+                        + " IS NULL, `t1`.`count`, 0)), 0) * 5.0 as 
`null_count`, "
+                        + "SUBSTRING(CAST('1' AS STRING), 1, 1024) AS `min`,"
+                        + " SUBSTRING(CAST('2' AS STRING), 1, 1024) AS `max`, "
+                        + "SUM(LENGTH(`column_key`) * count) * 5.0 AS 
`data_size`, NOW() "
+                        + "FROM (     SELECT t0.`${colName}` as `column_key`, 
COUNT(1) "
+                        + "as `count`     FROM     (SELECT `${colName}` FROM "
+                        + "`catalogName`.`${dbName}`.`${tblName}`     "
+                        + " limit 100) as `t0`     GROUP BY `t0`.`${colName}` 
) as `t1` ", sql);
                 return;
             }
         };
@@ -216,7 +229,16 @@ public class OlapAnalysisTaskTest {
 
             @Mock
             public void runQuery(String sql) {
-                Assertions.assertEquals(" SELECT CONCAT(30001, '-', -1, '-', 
'null') AS `id`, 10001 AS `catalog_id`, 20001 AS `db_id`, 30001 AS `tbl_id`, -1 
AS `idx_id`, 'null' AS `col_id`, NULL AS `part_id`, 500 AS `row_count`, 
ROUND(NDV(`${colName}`) * 5.0) as `ndv`, ROUND(SUM(CASE WHEN `${colName}` IS 
NULL THEN 1 ELSE 0 END) * 5.0) AS `null_count`, '1' AS `min`, '2' AS `max`, 
SUM(LENGTH(`${colName}`)) * 5.0 AS `data_size`, NOW() FROM 
`catalogName`.`${dbName}`.`${tblName}`  limit 100", sql);
+                Assertions.assertEquals(" "
+                        + "SELECT CONCAT(30001, '-', -1, '-', 'null') AS `id`, 
"
+                        + "10001 AS `catalog_id`, 20001 AS `db_id`, 30001 AS 
`tbl_id`, "
+                        + "-1 AS `idx_id`, 'null' AS `col_id`, NULL AS 
`part_id`, "
+                        + "500 AS `row_count`, ROUND(NDV(`${colName}`) * 5.0) 
as `ndv`, "
+                        + "ROUND(SUM(CASE WHEN `${colName}` IS NULL THEN 1 
ELSE 0 END) * 5.0) "
+                        + "AS `null_count`, SUBSTRING(CAST('1' AS STRING), 1, 
1024) AS `min`, "
+                        + "SUBSTRING(CAST('2' AS STRING), 1, 1024) AS `max`, "
+                        + "SUM(LENGTH(`${colName}`)) * 5.0 AS `data_size`, 
NOW() "
+                        + "FROM `catalogName`.`${dbName}`.`${tblName}`  limit 
100", sql);
                 return;
             }
         };
@@ -289,7 +311,18 @@ public class OlapAnalysisTaskTest {
 
             @Mock
             public void runQuery(String sql) {
-                Assertions.assertEquals("SELECT CONCAT('30001', '-', '-1', 
'-', 'null') AS `id`, 10001 AS `catalog_id`, 20001 AS `db_id`, 30001 AS 
`tbl_id`, -1 AS `idx_id`, 'null' AS `col_id`, NULL AS `part_id`, 500 AS 
`row_count`, SUM(`t1`.`count`) * COUNT(1) / (SUM(`t1`.`count`) - 
SUM(IF(`t1`.`count` = 1, 1, 0)) + SUM(IF(`t1`.`count` = 1, 1, 0)) * 
SUM(`t1`.`count`) / 500) as `ndv`, IFNULL(SUM(IF(`t1`.`column_key` IS NULL, 
`t1`.`count`, 0)), 0) * 5.0 as `null_count`, '1' AS `min`, '2' A [...]
+                System.out.println(sql);
+                Assertions.assertEquals("SELECT CONCAT('30001', '-', '-1', 
'-', 'null') "
+                        + "AS `id`, 10001 AS `catalog_id`, 20001 AS `db_id`, 
30001 AS `tbl_id`, "
+                        + "-1 AS `idx_id`, 'null' AS `col_id`, NULL AS 
`part_id`,"
+                        + " 500 AS `row_count`, SUM(`t1`.`count`) * COUNT(1) / 
(SUM(`t1`.`count`) "
+                        + "- SUM(IF(`t1`.`count` = 1, 1, 0)) + 
SUM(IF(`t1`.`count` = 1, 1, 0)) * "
+                        + "SUM(`t1`.`count`) / 500) as `ndv`, 
IFNULL(SUM(IF(`t1`.`column_key` "
+                        + "IS NULL, `t1`.`count`, 0)), 0) * 5.0 as 
`null_count`, "
+                        + "SUBSTRING(CAST('1' AS STRING), 1, 1024) AS `min`, "
+                        + "SUBSTRING(CAST('2' AS STRING), 1, 1024) AS `max`, "
+                        + "SUM(LENGTH(`column_key`) * count) * 5.0 AS 
`data_size`, NOW() "
+                        + "FROM (     SELECT t0.`${colName}` as `column_key`, 
COUNT(1) as `count`     FROM     (SELECT `${colName}` FROM 
`catalogName`.`${dbName}`.`${tblName}`      limit 100) as `t0`     GROUP BY 
`t0`.`${colName}` ) as `t1` ", sql);
                 return;
             }
         };


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to