This is an automated email from the ASF dual-hosted git repository.

kxiao pushed a commit to branch branch-2.0-beta
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 98be895a4c655dc5e50825e701e35cd9241b49f1
Author: minghong <[email protected]>
AuthorDate: Tue Jun 6 19:07:33 2023 +0800

    [fix](stats) skip forbid_unknown_col_stats check for invisible column and 
internal db (#20362)
    
    1. skip forbidUnknownColStats check for in-visible columns
    2. use columsStatistics.isUnknown to tell if this stats is unknown
    3. skip unknown stats check for internal schema
---
 .../apache/doris/analysis/ShowColumnStatsStmt.java |  2 +-
 .../doris/catalog/external/HMSExternalTable.java   |  2 +-
 .../doris/nereids/stats/ExpressionEstimation.java  |  4 +--
 .../doris/nereids/stats/FilterEstimation.java      |  2 +-
 .../doris/nereids/stats/StatsCalculator.java       | 36 +++++++++++++++++++---
 .../doris/statistics/StatisticConstants.java       | 14 +++++++++
 .../apache/doris/statistics/StatisticsCache.java   |  2 +-
 .../doris/nereids/stats/FilterEstimationTest.java  |  6 ++--
 .../org/apache/doris/statistics/CacheTest.java     |  6 ++--
 9 files changed, 57 insertions(+), 17 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowColumnStatsStmt.java 
b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowColumnStatsStmt.java
index fd67316df4..8e783c1961 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowColumnStatsStmt.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowColumnStatsStmt.java
@@ -136,7 +136,7 @@ public class ShowColumnStatsStmt extends ShowStmt {
     public ShowResultSet constructResultSet(List<Pair<String, 
ColumnStatistic>> columnStatistics) {
         List<List<String>> result = Lists.newArrayList();
         columnStatistics.forEach(p -> {
-            if (p.second == ColumnStatistic.UNKNOWN) {
+            if (p.second.isUnKnown) {
                 return;
             }
             List<String> row = Lists.newArrayList();
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java
 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java
index af032d4ae2..7a8fdaf6fb 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java
@@ -386,7 +386,7 @@ public class HMSExternalTable extends ExternalTable {
         ColumnStatistic cache = Config.enable_stats
                 ? 
Env.getCurrentEnv().getStatisticsCache().getColumnStatistics(id, "")
                 : ColumnStatistic.UNKNOWN;
-        if (cache == ColumnStatistic.UNKNOWN) {
+        if (cache.isUnKnown) {
             return 1;
         } else {
             return (long) cache.count;
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/ExpressionEstimation.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/ExpressionEstimation.java
index d5449987cf..eca7511684 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/ExpressionEstimation.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/ExpressionEstimation.java
@@ -252,7 +252,7 @@ public class ExpressionEstimation extends 
ExpressionVisitor<ColumnStatistic, Sta
     public ColumnStatistic visitMin(Min min, Statistics context) {
         Expression child = min.child();
         ColumnStatistic columnStat = child.accept(this, context);
-        if (columnStat == ColumnStatistic.UNKNOWN) {
+        if (columnStat.isUnKnown) {
             return ColumnStatistic.UNKNOWN;
         }
         /*
@@ -270,7 +270,7 @@ public class ExpressionEstimation extends 
ExpressionVisitor<ColumnStatistic, Sta
     public ColumnStatistic visitMax(Max max, Statistics context) {
         Expression child = max.child();
         ColumnStatistic columnStat = child.accept(this, context);
-        if (columnStat == ColumnStatistic.UNKNOWN) {
+        if (columnStat.isUnKnown) {
             return ColumnStatistic.UNKNOWN;
         }
         /*
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java
index d7ae653798..acf072fb82 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java
@@ -197,7 +197,7 @@ public class FilterEstimation extends 
ExpressionVisitor<Statistics, EstimationCo
 
     private Statistics calculateWhenLiteralRight(ComparisonPredicate cp,
             ColumnStatistic statsForLeft, ColumnStatistic statsForRight, 
EstimationContext context) {
-        if (statsForLeft == ColumnStatistic.UNKNOWN) {
+        if (statsForLeft.isUnKnown) {
             return context.statistics.withSel(DEFAULT_INEQUALITY_COEFFICIENT);
         }
 
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
index c166bd8d0b..77499198f2 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
@@ -18,6 +18,8 @@
 package org.apache.doris.nereids.stats;
 
 import org.apache.doris.catalog.Env;
+import org.apache.doris.catalog.OlapTable;
+import org.apache.doris.catalog.SchemaTable;
 import org.apache.doris.catalog.TableIf;
 import org.apache.doris.common.Config;
 import org.apache.doris.common.Pair;
@@ -109,6 +111,7 @@ import org.apache.doris.nereids.types.DataType;
 import org.apache.doris.statistics.ColumnStatistic;
 import org.apache.doris.statistics.ColumnStatisticBuilder;
 import org.apache.doris.statistics.Histogram;
+import org.apache.doris.statistics.StatisticConstants;
 import org.apache.doris.statistics.StatisticRange;
 import org.apache.doris.statistics.Statistics;
 import org.apache.doris.statistics.StatisticsBuilder;
@@ -558,12 +561,21 @@ public class StatsCalculator extends 
DefaultPlanVisitor<Statistics, Void> {
                         
.setAvgSizeByte(slotReference.getColumn().get().getType().getSlotSize())
                         .build();
             }
-            if (cache == ColumnStatistic.UNKNOWN && 
!colName.equals("__DORIS_DELETE_SIGN__")) {
-                if (forbidUnknownColStats) {
+            if (cache.isUnKnown) {
+                if (forbidUnknownColStats && 
!ignoreUnknownColStatsCheck(table, slotReference)) {
                     if (StatisticsUtil.statsTblAvailable()) {
-                        throw new AnalysisException("column stats for " + 
colName
-                                + " is unknown,"
-                                + " `set forbid_unknown_col_stats = false` to 
execute sql with unknown stats");
+                        throw new AnalysisException(String.format("Found 
unknown stats for column:%s.%s.\n"
+                                + "It may caused by:\n"
+                                + "\n"
+                                + "1. This column never got analyzed\n"
+                                + "2. This table is empty\n"
+                                + "3. Stats load failed caused by unstable of 
backends,"
+                                + "and FE cached the unknown stats by default 
in this scenario\n"
+                                + "4. There is a bug, please report it to 
Doris community\n"
+                                + "\n"
+                                + "If an unknown stats for this column is 
tolerable,"
+                                + "you could set session variable 
`forbid_unknown_col_stats` to false to make planner"
+                                + " ignore this error and keep planning.", 
table.getName(), colName));
                     } else {
                         throw new AnalysisException("BE is not available!");
                     }
@@ -967,4 +979,18 @@ public class StatsCalculator extends 
DefaultPlanVisitor<Statistics, Void> {
             PhysicalCTEAnchor<? extends Plan, ? extends Plan> cteAnchor, Void 
context) {
         return groupExpression.childStatistics(1);
     }
+
+    private boolean ignoreUnknownColStatsCheck(TableIf tableIf, SlotReference 
slot) {
+        if (tableIf instanceof SchemaTable) {
+            return true;
+        }
+        if (tableIf instanceof OlapTable) {
+            OlapTable olapTable = (OlapTable) tableIf;
+            return 
StatisticConstants.STATISTICS_DB_BLACK_LIST.contains(olapTable.getQualifiedDbName());
+        }
+        if (slot.getColumn().isPresent() && 
slot.getColumn().get().isVisible()) {
+            return true;
+        }
+        return false;
+    }
 }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java
index 93072c750f..89167d64f2 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java
@@ -17,6 +17,12 @@
 
 package org.apache.doris.statistics;
 
+import org.apache.doris.cluster.ClusterNamespace;
+import org.apache.doris.common.FeConstants;
+import org.apache.doris.system.SystemInfoService;
+
+import java.util.ArrayList;
+import java.util.List;
 import java.util.concurrent.TimeUnit;
 
 public class StatisticConstants {
@@ -74,4 +80,12 @@ public class StatisticConstants {
 
     public static final int ANALYZE_MANAGER_INTERVAL_IN_SECS = 60;
 
+    public static List<String> STATISTICS_DB_BLACK_LIST = new ArrayList<>();
+
+    static {
+        STATISTICS_DB_BLACK_LIST.add(SystemInfoService.DEFAULT_CLUSTER
+                + ClusterNamespace.CLUSTER_DELIMITER + 
FeConstants.INTERNAL_DB_NAME);
+        STATISTICS_DB_BLACK_LIST.add(SystemInfoService.DEFAULT_CLUSTER
+                + ClusterNamespace.CLUSTER_DELIMITER + "information_schema");
+    }
 }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCache.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCache.java
index f92ec87841..1cf6e876ef 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCache.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCache.java
@@ -204,7 +204,7 @@ public class StatisticsCache {
                         return Optional.of(c);
                     }
                 };
-                if (c == ColumnStatistic.UNKNOWN) {
+                if (c.isUnKnown) {
                     continue;
                 }
                 columnStatisticsCache.put(k, f);
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java
 
b/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java
index 9452eb6ff8..1fe5e5b0a0 100644
--- 
a/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java
+++ 
b/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java
@@ -74,11 +74,11 @@ class FilterEstimationTest {
         FilterEstimation filterEstimation = new FilterEstimation();
         Statistics expected = filterEstimation.estimate(or, stat);
         Assertions.assertTrue(
-                Precision.equals(expected.getRowCount(), 550,
+                Precision.equals(expected.getRowCount(), 750,
                          0.01));
     }
 
-    // a > 500 or b < 100
+    // a > 500 and b < 100
     // b isNaN
     @Test
     public void testAndNaN() {
@@ -103,7 +103,7 @@ class FilterEstimationTest {
         FilterEstimation filterEstimation = new FilterEstimation();
         Statistics expected = filterEstimation.estimate(and, stat);
         Assertions.assertTrue(
-                Precision.equals(expected.getRowCount(), 50,
+                Precision.equals(expected.getRowCount(), 250,
                         0.01));
     }
 
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/statistics/CacheTest.java 
b/fe/fe-core/src/test/java/org/apache/doris/statistics/CacheTest.java
index 5a92bbdfbe..991167e973 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/statistics/CacheTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/CacheTest.java
@@ -62,10 +62,10 @@ public class CacheTest extends TestWithFeService {
         };
         StatisticsCache statisticsCache = new StatisticsCache();
         ColumnStatistic c = statisticsCache.getColumnStatistics(1, "col");
-        Assertions.assertEquals(c, ColumnStatistic.UNKNOWN);
+        Assertions.assertTrue(c.isUnKnown);
         Thread.sleep(100);
         c = statisticsCache.getColumnStatistics(1, "col");
-        Assertions.assertEquals(c, ColumnStatistic.UNKNOWN);
+        Assertions.assertTrue(c.isUnKnown);
     }
 
     @Test
@@ -126,7 +126,7 @@ public class CacheTest extends TestWithFeService {
         };
         StatisticsCache statisticsCache = new StatisticsCache();
         ColumnStatistic columnStatistic = 
statisticsCache.getColumnStatistics(0, "col");
-        Assertions.assertEquals(ColumnStatistic.UNKNOWN, columnStatistic);
+        Assertions.assertTrue(columnStatistic.isUnKnown);
         Thread.sleep(1000);
         columnStatistic = statisticsCache.getColumnStatistics(0, "col");
         Assertions.assertEquals(1, columnStatistic.count);


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to