This is an automated email from the ASF dual-hosted git repository.

morrysnow pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.0 by this push:
     new 1d3c2e42da1 [fix](stats)min and max return NaN when table is empty 
(#27863)
1d3c2e42da1 is described below

commit 1d3c2e42da126db1264bbb576529f8684daca251
Author: Jibing-Li <[email protected]>
AuthorDate: Fri Dec 1 15:42:36 2023 +0800

    [fix](stats)min and max return NaN when table is empty (#27863)
    
    fix analyze empty table and min/max null value bug:
    1. Skip empty analyze task for sample analyze task. (Full analyze task 
already skipped).
    2. Check sample rows is not 0 before calculate the scale factor.
    3. Remove ' in sql template after remove base64 encoding for min/max value.
    
    backport https://github.com/apache/doris/pull/27862
---
 .../apache/doris/statistics/BaseAnalysisTask.java  | 10 +++----
 .../apache/doris/statistics/HMSAnalysisTask.java   | 11 +++++---
 .../apache/doris/statistics/JdbcAnalysisTask.java  |  2 +-
 .../apache/doris/statistics/OlapAnalysisTask.java  | 32 ++++++++++++----------
 .../org/apache/doris/statistics/AnalyzeTest.java   |  2 +-
 .../doris/statistics/HMSAnalysisTaskTest.java      | 22 +++++++++++++++
 .../doris/statistics/OlapAnalysisTaskTest.java     |  9 ++----
 7 files changed, 56 insertions(+), 32 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java
index 4c0e07ce6cb..449a6fc15e5 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java
@@ -92,8 +92,8 @@ public abstract class BaseAnalysisTask {
             + "${rowCount} AS `row_count`, "
             + "${ndvFunction} as `ndv`, "
             + "IFNULL(SUM(IF(`t1`.`column_key` IS NULL, `t1`.`count`, 0)), 0) 
* ${scaleFactor} as `null_count`, "
-            + "'${min}' AS `min`, "
-            + "'${max}' AS `max`, "
+            + "${min} AS `min`, "
+            + "${max} AS `max`, "
             + "${dataSizeFunction} * ${scaleFactor} AS `data_size`, "
             + "NOW() "
             + "FROM ( "
@@ -115,8 +115,8 @@ public abstract class BaseAnalysisTask {
             + "${row_count} AS `row_count`, "
             + "${ndv} AS `ndv`, "
             + "${null_count} AS `null_count`, "
-            + "'${min}' AS `min`, "
-            + "'${max}' AS `max`, "
+            + "${min} AS `min`, "
+            + "${max} AS `max`, "
             + "${data_size} AS `data_size`, "
             + "NOW() ";
 
@@ -311,7 +311,7 @@ public abstract class BaseAnalysisTask {
         this.job = job;
     }
 
-    protected void runQuery(String sql, boolean needEncode) {
+    protected void runQuery(String sql) {
         long startTime = System.currentTimeMillis();
         try (AutoCloseConnectContext a  = 
StatisticsUtil.buildConnectContext()) {
             stmtExecutor = new StmtExecutor(a.connectContext, sql);
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java
index 7bd540de2c7..efd99d1eca9 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java
@@ -159,7 +159,7 @@ public class HMSAnalysisTask extends BaseAnalysisTask {
         }
         stringSubstitutor = new StringSubstitutor(params);
         String sql = stringSubstitutor.replace(sb.toString());
-        runQuery(sql, true);
+        runQuery(sql);
     }
 
     // Collect the partition column stats through HMS metadata.
@@ -201,12 +201,12 @@ public class HMSAnalysisTask extends BaseAnalysisTask {
         params.put("row_count", String.valueOf(count));
         params.put("ndv", String.valueOf(ndv));
         params.put("null_count", String.valueOf(numNulls));
-        params.put("min", min);
-        params.put("max", max);
+        params.put("min", StatisticsUtil.quote(min));
+        params.put("max", StatisticsUtil.quote(max));
         params.put("data_size", String.valueOf(dataSize));
         StringSubstitutor stringSubstitutor = new StringSubstitutor(params);
         String sql = 
stringSubstitutor.replace(ANALYZE_PARTITION_COLUMN_TEMPLATE);
-        runQuery(sql, true);
+        runQuery(sql);
     }
 
     private String updateMinValue(String currentMin, String value) {
@@ -313,6 +313,9 @@ public class HMSAnalysisTask extends BaseAnalysisTask {
         for (long size : chunkSizes) {
             total += size;
         }
+        if (total == 0) {
+            return Pair.of(1.0, 0L);
+        }
         // Calculate the sample target size for percent and rows sample.
         if (tableSample.isPercent()) {
             target = total * tableSample.getSampleValue() / 100;
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/JdbcAnalysisTask.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/JdbcAnalysisTask.java
index e2e83aa8fa6..50c437fa8f9 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/JdbcAnalysisTask.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/JdbcAnalysisTask.java
@@ -110,7 +110,7 @@ public class JdbcAnalysisTask extends BaseAnalysisTask {
         params.put("dataSizeFunction", getDataSizeFunction(col, false));
         StringSubstitutor stringSubstitutor = new StringSubstitutor(params);
         String sql = stringSubstitutor.replace(sb.toString());
-        runQuery(sql, true);
+        runQuery(sql);
     }
 
     private Map<String, String> buildTableStatsParams(String partId) {
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
index 45ccdbb0d68..bf144b1a116 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
@@ -59,7 +59,13 @@ public class OlapAnalysisTask extends BaseAnalysisTask {
     }
 
     public void doExecute() throws Exception {
-
+        Set<String> partitionNames = info.colToPartitions.get(info.colName);
+        if (partitionNames.isEmpty()) {
+            LOG.debug("Skip empty empty partition task for column {} in 
{}.{}.{}",
+                    info.catalogId, info.dbId, info.tblId, info.colName);
+            job.appendBuf(this, Collections.emptyList());
+            return;
+        }
         if (tableSample != null) {
             doSample();
         } else {
@@ -113,24 +119,25 @@ public class OlapAnalysisTask extends BaseAnalysisTask {
             params.put("scaleFactor", String.valueOf(scaleFactor));
             params.put("sampleHints", tabletStr.isEmpty() ? "" : 
String.format("TABLET(%s)", tabletStr));
             params.put("ndvFunction", 
getNdvFunction(String.valueOf(rowCount)));
-            params.put("min", min);
-            params.put("max", max);
+            params.put("min", StatisticsUtil.quote(min));
+            params.put("max", StatisticsUtil.quote(max));
             params.put("rowCount", String.valueOf(rowCount));
             params.put("type", col.getType().toString());
             params.put("limit", "");
             if (needLimit()) {
                 // If the tablets to be sampled are too large, use limit to 
control the rows to read, and re-calculate
                 // the scaleFactor.
-                limitFlag = true;
                 rowsToSample = Math.min(getSampleRows(), pair.second);
-                params.put("limit", "limit " + rowsToSample);
-                params.put("scaleFactor", String.valueOf(scaleFactor * 
(double) pair.second / rowsToSample));
+                // Empty table doesn't need to limit.
+                if (rowsToSample > 0) {
+                    limitFlag = true;
+                    params.put("limit", "limit " + rowsToSample);
+                    params.put("scaleFactor", String.valueOf(scaleFactor * 
(double) pair.second / rowsToSample));
+                }
             }
             StringSubstitutor stringSubstitutor = new 
StringSubstitutor(params);
             String sql;
             if (useLinearAnalyzeTemplate()) {
-                params.put("min", StatisticsUtil.quote(min));
-                params.put("max", StatisticsUtil.quote(max));
                 // For single unique key, use count as ndv.
                 if (isSingleUniqueKey()) {
                     params.put("ndvFunction", String.valueOf(rowCount));
@@ -148,7 +155,7 @@ public class OlapAnalysisTask extends BaseAnalysisTask {
                     col.getName(), params.get("rowCount"), rowsToSample, 
params.get("scaleFactor"),
                     limitFlag, tbl.isDistributionColumn(col.getName()),
                     tbl.isPartitionColumn(col.getName()), col.isKey(), 
isSingleUniqueKey());
-            runQuery(sql, false);
+            runQuery(sql);
         }
     }
 
@@ -169,11 +176,6 @@ public class OlapAnalysisTask extends BaseAnalysisTask {
      */
     protected void doFull() throws Exception {
         LOG.debug("Will do full collection for column {}", col.getName());
-        Set<String> partitionNames = info.colToPartitions.get(info.colName);
-        if (partitionNames.isEmpty()) {
-            job.appendBuf(this, Collections.emptyList());
-            return;
-        }
         Map<String, String> params = new HashMap<>();
         params.put("internalDB", FeConstants.INTERNAL_DB_NAME);
         params.put("columnStatTbl", StatisticConstants.STATISTIC_TBL_NAME);
@@ -189,7 +191,7 @@ public class OlapAnalysisTask extends BaseAnalysisTask {
         params.put("tblName", String.valueOf(tbl.getName()));
         StringSubstitutor stringSubstitutor = new StringSubstitutor(params);
         String collectColStats = 
stringSubstitutor.replace(COLLECT_COL_STATISTICS);
-        runQuery(collectColStats, true);
+        runQuery(collectColStats);
     }
 
     // Get sample tablets id and scale up scaleFactor
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalyzeTest.java 
b/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalyzeTest.java
index 1487d192466..9cebb3283d5 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalyzeTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalyzeTest.java
@@ -157,7 +157,7 @@ public class AnalyzeTest extends TestWithFeService {
         new MockUp<BaseAnalysisTask>() {
 
             @Mock
-            protected void runQuery(String sql, boolean needEncode) {}
+            protected void runQuery(String sql) {}
         };
         HashMap<String, Set<String>> colToPartitions = Maps.newHashMap();
         colToPartitions.put("col1", Collections.singleton("t1"));
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/statistics/HMSAnalysisTaskTest.java 
b/fe/fe-core/src/test/java/org/apache/doris/statistics/HMSAnalysisTaskTest.java
index 24a74053bb6..a569a5cb06d 100644
--- 
a/fe/fe-core/src/test/java/org/apache/doris/statistics/HMSAnalysisTaskTest.java
+++ 
b/fe/fe-core/src/test/java/org/apache/doris/statistics/HMSAnalysisTaskTest.java
@@ -21,6 +21,7 @@ import org.apache.doris.analysis.TableSample;
 import org.apache.doris.catalog.Column;
 import org.apache.doris.catalog.PrimitiveType;
 import org.apache.doris.catalog.external.HMSExternalTable;
+import org.apache.doris.common.Pair;
 import org.apache.doris.statistics.util.StatisticsUtil;
 
 import com.google.common.collect.Lists;
@@ -138,4 +139,25 @@ public class HMSAnalysisTaskTest {
         Assertions.assertEquals(1000, tableSample.getSampleValue());
     }
 
+    @Test
+    public void testGetSampleInfo(@Mocked HMSExternalTable tableIf)
+            throws Exception {
+        new MockUp<HMSExternalTable>() {
+            @Mock
+            public List<Long> getChunkSizes() {
+                return Lists.newArrayList();
+            }
+        };
+        HMSAnalysisTask task = new HMSAnalysisTask();
+        task.setTable(tableIf);
+        task.tableSample = null;
+        Pair<Double, Long> info1 = task.getSampleInfo();
+        Assertions.assertEquals(1.0, info1.first);
+        Assertions.assertEquals(0, info1.second);
+        task.tableSample = new TableSample(false, 100L);
+        Pair<Double, Long> info2 = task.getSampleInfo();
+        Assertions.assertEquals(1.0, info2.first);
+        Assertions.assertEquals(0, info2.second);
+    }
+
 }
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java
 
b/fe/fe-core/src/test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java
index 7b7894e54b4..ed9122f70b6 100644
--- 
a/fe/fe-core/src/test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java
+++ 
b/fe/fe-core/src/test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java
@@ -149,8 +149,7 @@ public class OlapAnalysisTaskTest {
             }
 
             @Mock
-            public void runQuery(String sql, boolean needEncode) {
-                Assertions.assertFalse(needEncode);
+            public void runQuery(String sql) {
                 Assertions.assertEquals("SELECT CONCAT('30001', '-', '-1', 
'-', 'null') AS `id`, 10001 AS `catalog_id`, 20001 AS `db_id`, 30001 AS 
`tbl_id`, -1 AS `idx_id`, 'null' AS `col_id`, NULL AS `part_id`, 500 AS 
`row_count`, SUM(`t1`.`count`) * COUNT(1) / (SUM(`t1`.`count`) - 
SUM(IF(`t1`.`count` = 1, 1, 0)) + SUM(IF(`t1`.`count` = 1, 1, 0)) * 
SUM(`t1`.`count`) / 500) as `ndv`, IFNULL(SUM(IF(`t1`.`column_key` IS NULL, 
`t1`.`count`, 0)), 0) * 5.0 as `null_count`, '1' AS `min`, '2' A [...]
                 return;
             }
@@ -216,8 +215,7 @@ public class OlapAnalysisTaskTest {
             }
 
             @Mock
-            public void runQuery(String sql, boolean needEncode) {
-                Assertions.assertFalse(needEncode);
+            public void runQuery(String sql) {
                 Assertions.assertEquals(" SELECT CONCAT(30001, '-', -1, '-', 
'null') AS `id`, 10001 AS `catalog_id`, 20001 AS `db_id`, 30001 AS `tbl_id`, -1 
AS `idx_id`, 'null' AS `col_id`, NULL AS `part_id`, 500 AS `row_count`, 
ROUND(NDV(`${colName}`) * 5.0) as `ndv`, ROUND(SUM(CASE WHEN `${colName}` IS 
NULL THEN 1 ELSE 0 END) * 5.0) AS `null_count`, '1' AS `min`, '2' AS `max`, 
SUM(LENGTH(`${colName}`)) * 5.0 AS `data_size`, NOW() FROM 
`catalogName`.`${dbName}`.`${tblName}`  limit 100", sql);
                 return;
             }
@@ -290,8 +288,7 @@ public class OlapAnalysisTaskTest {
             }
 
             @Mock
-            public void runQuery(String sql, boolean needEncode) {
-                Assertions.assertFalse(needEncode);
+            public void runQuery(String sql) {
                 Assertions.assertEquals("SELECT CONCAT('30001', '-', '-1', 
'-', 'null') AS `id`, 10001 AS `catalog_id`, 20001 AS `db_id`, 30001 AS 
`tbl_id`, -1 AS `idx_id`, 'null' AS `col_id`, NULL AS `part_id`, 500 AS 
`row_count`, SUM(`t1`.`count`) * COUNT(1) / (SUM(`t1`.`count`) - 
SUM(IF(`t1`.`count` = 1, 1, 0)) + SUM(IF(`t1`.`count` = 1, 1, 0)) * 
SUM(`t1`.`count`) / 500) as `ndv`, IFNULL(SUM(IF(`t1`.`column_key` IS NULL, 
`t1`.`count`, 0)), 0) * 5.0 as `null_count`, '1' AS `min`, '2' A [...]
                 return;
             }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to