This is an automated email from the ASF dual-hosted git repository. kxiao pushed a commit to branch branch-2.0-beta in repository https://gitbox.apache.org/repos/asf/doris.git
commit 98be895a4c655dc5e50825e701e35cd9241b49f1 Author: minghong <[email protected]> AuthorDate: Tue Jun 6 19:07:33 2023 +0800 [fix](stats) skip forbid_unknown_col_stats check for invisible column and internal db (#20362) 1. skip forbidUnknownColStats check for in-visible columns 2. use columsStatistics.isUnknown to tell if this stats is unknown 3. skip unknown stats check for internal schema --- .../apache/doris/analysis/ShowColumnStatsStmt.java | 2 +- .../doris/catalog/external/HMSExternalTable.java | 2 +- .../doris/nereids/stats/ExpressionEstimation.java | 4 +-- .../doris/nereids/stats/FilterEstimation.java | 2 +- .../doris/nereids/stats/StatsCalculator.java | 36 +++++++++++++++++++--- .../doris/statistics/StatisticConstants.java | 14 +++++++++ .../apache/doris/statistics/StatisticsCache.java | 2 +- .../doris/nereids/stats/FilterEstimationTest.java | 6 ++-- .../org/apache/doris/statistics/CacheTest.java | 6 ++-- 9 files changed, 57 insertions(+), 17 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowColumnStatsStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowColumnStatsStmt.java index fd67316df4..8e783c1961 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowColumnStatsStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowColumnStatsStmt.java @@ -136,7 +136,7 @@ public class ShowColumnStatsStmt extends ShowStmt { public ShowResultSet constructResultSet(List<Pair<String, ColumnStatistic>> columnStatistics) { List<List<String>> result = Lists.newArrayList(); columnStatistics.forEach(p -> { - if (p.second == ColumnStatistic.UNKNOWN) { + if (p.second.isUnKnown) { return; } List<String> row = Lists.newArrayList(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java index af032d4ae2..7a8fdaf6fb 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java @@ -386,7 +386,7 @@ public class HMSExternalTable extends ExternalTable { ColumnStatistic cache = Config.enable_stats ? Env.getCurrentEnv().getStatisticsCache().getColumnStatistics(id, "") : ColumnStatistic.UNKNOWN; - if (cache == ColumnStatistic.UNKNOWN) { + if (cache.isUnKnown) { return 1; } else { return (long) cache.count; diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/ExpressionEstimation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/ExpressionEstimation.java index d5449987cf..eca7511684 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/ExpressionEstimation.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/ExpressionEstimation.java @@ -252,7 +252,7 @@ public class ExpressionEstimation extends ExpressionVisitor<ColumnStatistic, Sta public ColumnStatistic visitMin(Min min, Statistics context) { Expression child = min.child(); ColumnStatistic columnStat = child.accept(this, context); - if (columnStat == ColumnStatistic.UNKNOWN) { + if (columnStat.isUnKnown) { return ColumnStatistic.UNKNOWN; } /* @@ -270,7 +270,7 @@ public class ExpressionEstimation extends ExpressionVisitor<ColumnStatistic, Sta public ColumnStatistic visitMax(Max max, Statistics context) { Expression child = max.child(); ColumnStatistic columnStat = child.accept(this, context); - if (columnStat == ColumnStatistic.UNKNOWN) { + if (columnStat.isUnKnown) { return ColumnStatistic.UNKNOWN; } /* diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java index d7ae653798..acf072fb82 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java @@ -197,7 +197,7 @@ public class FilterEstimation extends ExpressionVisitor<Statistics, EstimationCo private Statistics calculateWhenLiteralRight(ComparisonPredicate cp, ColumnStatistic statsForLeft, ColumnStatistic statsForRight, EstimationContext context) { - if (statsForLeft == ColumnStatistic.UNKNOWN) { + if (statsForLeft.isUnKnown) { return context.statistics.withSel(DEFAULT_INEQUALITY_COEFFICIENT); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java index c166bd8d0b..77499198f2 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java @@ -18,6 +18,8 @@ package org.apache.doris.nereids.stats; import org.apache.doris.catalog.Env; +import org.apache.doris.catalog.OlapTable; +import org.apache.doris.catalog.SchemaTable; import org.apache.doris.catalog.TableIf; import org.apache.doris.common.Config; import org.apache.doris.common.Pair; @@ -109,6 +111,7 @@ import org.apache.doris.nereids.types.DataType; import org.apache.doris.statistics.ColumnStatistic; import org.apache.doris.statistics.ColumnStatisticBuilder; import org.apache.doris.statistics.Histogram; +import org.apache.doris.statistics.StatisticConstants; import org.apache.doris.statistics.StatisticRange; import org.apache.doris.statistics.Statistics; import org.apache.doris.statistics.StatisticsBuilder; @@ -558,12 +561,21 @@ public class StatsCalculator extends DefaultPlanVisitor<Statistics, Void> { .setAvgSizeByte(slotReference.getColumn().get().getType().getSlotSize()) .build(); } - if (cache == ColumnStatistic.UNKNOWN && !colName.equals("__DORIS_DELETE_SIGN__")) { - if (forbidUnknownColStats) { + if (cache.isUnKnown) { + if (forbidUnknownColStats && !ignoreUnknownColStatsCheck(table, slotReference)) { if (StatisticsUtil.statsTblAvailable()) { - throw new AnalysisException("column stats for " + colName - + " is unknown," - + " `set forbid_unknown_col_stats = false` to execute sql with unknown stats"); + throw new AnalysisException(String.format("Found unknown stats for column:%s.%s.\n" + + "It may caused by:\n" + + "\n" + + "1. This column never got analyzed\n" + + "2. This table is empty\n" + + "3. Stats load failed caused by unstable of backends," + + "and FE cached the unknown stats by default in this scenario\n" + + "4. There is a bug, please report it to Doris community\n" + + "\n" + + "If an unknown stats for this column is tolerable," + + "you could set session variable `forbid_unknown_col_stats` to false to make planner" + + " ignore this error and keep planning.", table.getName(), colName)); } else { throw new AnalysisException("BE is not available!"); } @@ -967,4 +979,18 @@ public class StatsCalculator extends DefaultPlanVisitor<Statistics, Void> { PhysicalCTEAnchor<? extends Plan, ? extends Plan> cteAnchor, Void context) { return groupExpression.childStatistics(1); } + + private boolean ignoreUnknownColStatsCheck(TableIf tableIf, SlotReference slot) { + if (tableIf instanceof SchemaTable) { + return true; + } + if (tableIf instanceof OlapTable) { + OlapTable olapTable = (OlapTable) tableIf; + return StatisticConstants.STATISTICS_DB_BLACK_LIST.contains(olapTable.getQualifiedDbName()); + } + if (slot.getColumn().isPresent() && slot.getColumn().get().isVisible()) { + return true; + } + return false; + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java index 93072c750f..89167d64f2 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java @@ -17,6 +17,12 @@ package org.apache.doris.statistics; +import org.apache.doris.cluster.ClusterNamespace; +import org.apache.doris.common.FeConstants; +import org.apache.doris.system.SystemInfoService; + +import java.util.ArrayList; +import java.util.List; import java.util.concurrent.TimeUnit; public class StatisticConstants { @@ -74,4 +80,12 @@ public class StatisticConstants { public static final int ANALYZE_MANAGER_INTERVAL_IN_SECS = 60; + public static List<String> STATISTICS_DB_BLACK_LIST = new ArrayList<>(); + + static { + STATISTICS_DB_BLACK_LIST.add(SystemInfoService.DEFAULT_CLUSTER + + ClusterNamespace.CLUSTER_DELIMITER + FeConstants.INTERNAL_DB_NAME); + STATISTICS_DB_BLACK_LIST.add(SystemInfoService.DEFAULT_CLUSTER + + ClusterNamespace.CLUSTER_DELIMITER + "information_schema"); + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCache.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCache.java index f92ec87841..1cf6e876ef 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCache.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCache.java @@ -204,7 +204,7 @@ public class StatisticsCache { return Optional.of(c); } }; - if (c == ColumnStatistic.UNKNOWN) { + if (c.isUnKnown) { continue; } columnStatisticsCache.put(k, f); diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java index 9452eb6ff8..1fe5e5b0a0 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java @@ -74,11 +74,11 @@ class FilterEstimationTest { FilterEstimation filterEstimation = new FilterEstimation(); Statistics expected = filterEstimation.estimate(or, stat); Assertions.assertTrue( - Precision.equals(expected.getRowCount(), 550, + Precision.equals(expected.getRowCount(), 750, 0.01)); } - // a > 500 or b < 100 + // a > 500 and b < 100 // b isNaN @Test public void testAndNaN() { @@ -103,7 +103,7 @@ class FilterEstimationTest { FilterEstimation filterEstimation = new FilterEstimation(); Statistics expected = filterEstimation.estimate(and, stat); Assertions.assertTrue( - Precision.equals(expected.getRowCount(), 50, + Precision.equals(expected.getRowCount(), 250, 0.01)); } diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/CacheTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/CacheTest.java index 5a92bbdfbe..991167e973 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/CacheTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/CacheTest.java @@ -62,10 +62,10 @@ public class CacheTest extends TestWithFeService { }; StatisticsCache statisticsCache = new StatisticsCache(); ColumnStatistic c = statisticsCache.getColumnStatistics(1, "col"); - Assertions.assertEquals(c, ColumnStatistic.UNKNOWN); + Assertions.assertTrue(c.isUnKnown); Thread.sleep(100); c = statisticsCache.getColumnStatistics(1, "col"); - Assertions.assertEquals(c, ColumnStatistic.UNKNOWN); + Assertions.assertTrue(c.isUnKnown); } @Test @@ -126,7 +126,7 @@ public class CacheTest extends TestWithFeService { }; StatisticsCache statisticsCache = new StatisticsCache(); ColumnStatistic columnStatistic = statisticsCache.getColumnStatistics(0, "col"); - Assertions.assertEquals(ColumnStatistic.UNKNOWN, columnStatistic); + Assertions.assertTrue(columnStatistic.isUnKnown); Thread.sleep(1000); columnStatistic = statisticsCache.getColumnStatistics(0, "col"); Assertions.assertEquals(1, columnStatistic.count); --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
