This is an automated email from the ASF dual-hosted git repository.

kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.0 by this push:
     new 687a91872a2 Revert "[refactor](nereids)forbid unknown stats for 
branch2.0 #24061 (#24243)" (#24294)
687a91872a2 is described below

commit 687a91872a296bd60d38a2866a0ed21138a2e5d8
Author: Kang <[email protected]>
AuthorDate: Wed Sep 13 15:18:48 2023 +0800

    Revert "[refactor](nereids)forbid unknown stats for branch2.0 #24061 
(#24243)" (#24294)
    
    This reverts commit 108e91f2a37d08119e4a611a51fdd0341d9330b3.
---
 .../glue/translator/PhysicalPlanTranslator.java    | 35 ----------------
 .../glue/translator/PlanTranslatorContext.java     | 31 --------------
 .../rules/rewrite/PredicatePropagation.java        | 39 +++++++----------
 .../doris/nereids/stats/StatsCalculator.java       | 49 +++++++++++++++++-----
 .../nereids/rules/rewrite/InferPredicatesTest.java | 30 -------------
 .../infer_predicate/infer_predicate.groovy         | 18 --------
 6 files changed, 54 insertions(+), 148 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java
index 359373ddbf0..406dda54859 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java
@@ -126,11 +126,6 @@ import 
org.apache.doris.nereids.trees.plans.physical.PhysicalUnion;
 import org.apache.doris.nereids.trees.plans.physical.PhysicalWindow;
 import org.apache.doris.nereids.trees.plans.physical.RuntimeFilter;
 import org.apache.doris.nereids.trees.plans.visitor.DefaultPlanVisitor;
-import org.apache.doris.nereids.types.ArrayType;
-import org.apache.doris.nereids.types.DataType;
-import org.apache.doris.nereids.types.JsonType;
-import org.apache.doris.nereids.types.MapType;
-import org.apache.doris.nereids.types.StructType;
 import org.apache.doris.nereids.util.ExpressionUtils;
 import org.apache.doris.nereids.util.JoinUtils;
 import org.apache.doris.nereids.util.Utils;
@@ -240,14 +235,6 @@ public class PhysicalPlanTranslator extends 
DefaultPlanVisitor<PlanFragment, Pla
         Collections.reverse(context.getPlanFragments());
         // TODO: maybe we need to trans nullable directly? and then we could 
remove call computeMemLayout
         context.getDescTable().computeMemLayout();
-        if (ConnectContext.get() != null && 
ConnectContext.get().getSessionVariable().forbidUnknownColStats) {
-            Set<ScanNode> scans = context.getScanNodeWithUnknownColumnStats();
-            if (!scans.isEmpty()) {
-                StringBuilder builder = new StringBuilder();
-                scans.forEach(scanNode -> builder.append(scanNode));
-                throw new AnalysisException("tables with unknown column stats: 
" + builder);
-            }
-        }
         return rootFragment;
     }
 
@@ -543,15 +530,6 @@ public class PhysicalPlanTranslator extends 
DefaultPlanVisitor<PlanFragment, Pla
         // TODO: move all node set cardinality into one place
         if (olapScan.getStats() != null) {
             olapScanNode.setCardinality((long) 
olapScan.getStats().getRowCount());
-            if 
(ConnectContext.get().getSessionVariable().forbidUnknownColStats) {
-                for (int i = 0; i < slots.size(); i++) {
-                    Slot slot = slots.get(i);
-                    if 
(olapScan.getStats().findColumnStatistics(slot).isUnKnown()
-                            && !isComplexDataType(slot.getDataType())) {
-                        context.addUnknownStatsColumn(olapScanNode, 
tupleDescriptor.getSlots().get(i).getId());
-                    }
-                }
-            }
         }
         // TODO: Do we really need tableName here?
         TableName tableName = new TableName(null, "", "");
@@ -2000,14 +1978,6 @@ public class PhysicalPlanTranslator extends 
DefaultPlanVisitor<PlanFragment, Pla
             scanNode.getTupleDesc().getSlots().add(smallest);
         }
         try {
-            if (ConnectContext.get() != null && 
ConnectContext.get().getSessionVariable().forbidUnknownColStats) {
-                for (SlotId slotId : requiredByProjectSlotIdSet) {
-                    if (context.isColumnStatsUnknown(scanNode, slotId)) {
-                        throw new AnalysisException("meet unknown column stats 
on table " + scanNode);
-                    }
-                }
-                context.removeScanFromStatsUnknownColumnsMap(scanNode);
-            }
             scanNode.updateRequiredSlots(context, requiredByProjectSlotIdSet);
         } catch (UserException e) {
             Util.logAndThrowRuntimeException(LOG,
@@ -2270,9 +2240,4 @@ public class PhysicalPlanTranslator extends 
DefaultPlanVisitor<PlanFragment, Pla
         }
         return outputExprs;
     }
-
-    private boolean isComplexDataType(DataType dataType) {
-        return dataType instanceof ArrayType || dataType instanceof MapType || 
dataType instanceof JsonType
-                || dataType instanceof StructType;
-    }
 }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PlanTranslatorContext.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PlanTranslatorContext.java
index e69b5ee8ef3..256b37d7057 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PlanTranslatorContext.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PlanTranslatorContext.java
@@ -45,13 +45,11 @@ import org.apache.doris.thrift.TPushAggOp;
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
-import com.google.common.collect.Sets;
 
 import java.util.IdentityHashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Optional;
-import java.util.Set;
 import java.util.stream.Collectors;
 import javax.annotation.Nullable;
 
@@ -92,7 +90,6 @@ public class PlanTranslatorContext {
     private final Map<CTEId, PhysicalCTEProducer> cteProducerMap = 
Maps.newHashMap();
 
     private final Map<RelationId, TPushAggOp> tablePushAggOp = 
Maps.newHashMap();
-    private final Map<ScanNode, Set<SlotId>> statsUnknownColumnsMap = 
Maps.newHashMap();
 
     public PlanTranslatorContext(CascadesContext ctx) {
         this.translator = new 
RuntimeFilterTranslator(ctx.getRuntimeFilterContext());
@@ -103,34 +100,6 @@ public class PlanTranslatorContext {
         translator = null;
     }
 
-    /**
-     * remember the unknown-stats column and its scan, used for 
forbid_unknown_col_stats check
-     */
-    public void addUnknownStatsColumn(ScanNode scan, SlotId slotId) {
-        Set<SlotId> slots = statsUnknownColumnsMap.get(scan);
-        if (slots == null) {
-            statsUnknownColumnsMap.put(scan, Sets.newHashSet(slotId));
-        } else {
-            statsUnknownColumnsMap.get(scan).add(slotId);
-        }
-    }
-
-    public boolean isColumnStatsUnknown(ScanNode scan, SlotId slotId) {
-        Set<SlotId> unknownSlots = statsUnknownColumnsMap.get(scan);
-        if (unknownSlots == null) {
-            return false;
-        }
-        return unknownSlots.contains(slotId);
-    }
-
-    public void removeScanFromStatsUnknownColumnsMap(ScanNode scan) {
-        statsUnknownColumnsMap.remove(scan);
-    }
-
-    public Set<ScanNode> getScanNodeWithUnknownColumnStats() {
-        return statsUnknownColumnsMap.keySet();
-    }
-
     public List<PlanFragment> getPlanFragments() {
         return planFragments;
     }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PredicatePropagation.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PredicatePropagation.java
index 71818966696..cc45952817a 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PredicatePropagation.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PredicatePropagation.java
@@ -59,12 +59,12 @@ public class PredicatePropagation {
     }
 
     /**
-     * Use the left or right child of `equalExpr` to replace the left or right 
child of `expression`
+     * Use the left or right child of `leftSlotEqualToRightSlot` to replace 
the left or right child of `expression`
      * Now only support infer `ComparisonPredicate`.
      * TODO: We should determine whether `expression` satisfies the condition 
for replacement
      *       eg: Satisfy `expression` is non-deterministic
      */
-    private Expression doInfer(Expression equalExpr, Expression expression) {
+    private Expression doInfer(Expression leftSlotEqualToRightSlot, Expression 
expression) {
         return expression.accept(new DefaultExpressionRewriter<Void>() {
 
             @Override
@@ -76,43 +76,36 @@ public class PredicatePropagation {
             public Expression visitComparisonPredicate(ComparisonPredicate cp, 
Void context) {
                 // we need to get expression covered by cast, because we want 
to infer different datatype
                 if (ExpressionUtils.isExpressionSlotCoveredByCast(cp.left()) 
&& (cp.right().isConstant())) {
-                    return replaceSlot(cp, 
ExpressionUtils.getDatatypeCoveredByCast(cp.left()), equalExpr);
+                    return replaceSlot(cp, 
ExpressionUtils.getDatatypeCoveredByCast(cp.left()));
                 } else if 
(ExpressionUtils.isExpressionSlotCoveredByCast(cp.right()) && 
cp.left().isConstant()) {
-                    return replaceSlot(cp, 
ExpressionUtils.getDatatypeCoveredByCast(cp.right()), equalExpr);
+                    return replaceSlot(cp, 
ExpressionUtils.getDatatypeCoveredByCast(cp.right()));
                 }
                 return super.visit(cp, context);
             }
 
             private boolean isDataTypeValid(DataType originDataType, 
Expression expr) {
-                if ((expr.child(0).getDataType() instanceof IntegralType)
-                        && (expr.child(1).getDataType() instanceof 
IntegralType)
+                if ((leftSlotEqualToRightSlot.child(0).getDataType() 
instanceof IntegralType)
+                        && (leftSlotEqualToRightSlot.child(1).getDataType() 
instanceof IntegralType)
                                 && (originDataType instanceof IntegralType)) {
                     // infer filter can not be lower than original datatype, 
or dataset would be wrong
                     if (!((IntegralType) originDataType).widerThan(
-                            (IntegralType) expr.child(0).getDataType())
+                            (IntegralType) 
leftSlotEqualToRightSlot.child(0).getDataType())
                                     && !((IntegralType) 
originDataType).widerThan(
-                                            (IntegralType) 
expr.child(1).getDataType())) {
+                                            (IntegralType) 
leftSlotEqualToRightSlot.child(1).getDataType())) {
                         return true;
                     }
-                } else if 
(expr.child(0).getDataType().equals(expr.child(1).getDataType())) {
-                    return true;
                 }
                 return false;
             }
 
-            private Expression replaceSlot(Expression sourcePredicate, 
DataType originDataType, Expression equal) {
-                if (!isDataTypeValid(originDataType, equal)) {
-                    return sourcePredicate;
-                }
-                return sourcePredicate.rewriteUp(e -> {
-                    // we can not replace Cast expression to slot because when 
rewrite up, we have replace child of cast
-                    if (e instanceof Cast) {
-                        return e;
-                    }
-                    if (ExpressionUtils.isTwoExpressionEqualWithCast(e, 
equal.child(0))) {
-                        return equal.child(1);
-                    } else if (ExpressionUtils.isTwoExpressionEqualWithCast(e, 
equal.child(1))) {
-                        return equal.child(0);
+            private Expression replaceSlot(Expression expr, DataType 
originDataType) {
+                return expr.rewriteUp(e -> {
+                    if (isDataTypeValid(originDataType, 
leftSlotEqualToRightSlot)) {
+                        if (ExpressionUtils.isTwoExpressionEqualWithCast(e, 
leftSlotEqualToRightSlot.child(0))) {
+                            return leftSlotEqualToRightSlot.child(1);
+                        } else if 
(ExpressionUtils.isTwoExpressionEqualWithCast(e, 
leftSlotEqualToRightSlot.child(1))) {
+                            return leftSlotEqualToRightSlot.child(0);
+                        }
                     }
                     return e;
                 });
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
index 24ec929e820..45aeae54fde 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
@@ -26,6 +26,7 @@ import org.apache.doris.common.Config;
 import org.apache.doris.common.FeConstants;
 import org.apache.doris.common.Pair;
 import org.apache.doris.nereids.CascadesContext;
+import org.apache.doris.nereids.exceptions.AnalysisException;
 import org.apache.doris.nereids.memo.Group;
 import org.apache.doris.nereids.memo.GroupExpression;
 import org.apache.doris.nereids.trees.expressions.Alias;
@@ -122,6 +123,7 @@ import org.apache.doris.statistics.StatisticConstants;
 import org.apache.doris.statistics.StatisticRange;
 import org.apache.doris.statistics.Statistics;
 import org.apache.doris.statistics.StatisticsBuilder;
+import org.apache.doris.statistics.util.StatisticsUtil;
 
 import com.google.common.base.Preconditions;
 import com.google.common.collect.Maps;
@@ -621,21 +623,46 @@ public class StatsCalculator extends 
DefaultPlanVisitor<Statistics, Void> {
                         
.setAvgSizeByte(slotReference.getColumn().get().getType().getSlotSize())
                         .build();
             }
-            if (!cache.isUnKnown) {
-                rowCount = Math.max(rowCount, cache.count);
-                Histogram histogram = getColumnHistogram(table, colName);
-                if (histogram != null) {
-                    ColumnStatisticBuilder columnStatisticBuilder =
-                            new 
ColumnStatisticBuilder(cache).setHistogram(histogram);
-                    cache = columnStatisticBuilder.build();
-                    if 
(ConnectContext.get().getSessionVariable().isEnableMinidump()
-                            && 
!ConnectContext.get().getSessionVariable().isPlayNereidsDump()) {
-                        totalColumnStatisticMap.put(table.getName() + ":" + 
colName, cache);
-                        totalHistogramMap.put(table.getName() + colName, 
histogram);
+            if (cache.isUnKnown) {
+                if (forbidUnknownColStats && !shouldIgnoreThisCol) {
+                    if (StatisticsUtil.statsTblAvailable()) {
+                        throw new AnalysisException(String.format("Found 
unknown stats for column:%s.%s.\n"
+                                + "It may caused by:\n"
+                                + "\n"
+                                + "1. This column never got analyzed\n"
+                                + "2. This table is empty\n"
+                                + "3. Stats load failed caused by unstable of 
backends,"
+                                + "and FE cached the unknown stats by default 
in this scenario\n"
+                                + "4. There is a bug, please report it to 
Doris community\n"
+                                + "\n"
+                                + "If an unknown stats for this column is 
tolerable,"
+                                + "you could set session variable 
`forbid_unknown_col_stats` to false to make planner"
+                                + " ignore this error and keep planning.", 
table.getName(), colName));
+                    } else {
+                        throw new AnalysisException("BE is not available!");
                     }
                 }
+                columnStatisticMap.put(slotReference, cache);
+                continue;
+            }
+            rowCount = Math.max(rowCount, cache.count);
+            Histogram histogram = getColumnHistogram(table, colName);
+            if (histogram != null) {
+                ColumnStatisticBuilder columnStatisticBuilder =
+                        new 
ColumnStatisticBuilder(cache).setHistogram(histogram);
+                columnStatisticMap.put(slotReference, 
columnStatisticBuilder.build());
+                cache = columnStatisticBuilder.build();
+                if 
(ConnectContext.get().getSessionVariable().isEnableMinidump()
+                        && 
!ConnectContext.get().getSessionVariable().isPlayNereidsDump()) {
+                    totalHistogramMap.put(table.getName() + ":" + colName, 
histogram);
+                }
             }
             columnStatisticMap.put(slotReference, cache);
+            if (ConnectContext.get().getSessionVariable().isEnableMinidump()
+                    && 
!ConnectContext.get().getSessionVariable().isPlayNereidsDump()) {
+                totalColumnStatisticMap.put(table.getName() + ":" + colName, 
cache);
+                totalHistogramMap.put(table.getName() + colName, histogram);
+            }
         }
         return new Statistics(rowCount, columnStatisticMap);
     }
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/InferPredicatesTest.java
 
b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/InferPredicatesTest.java
index b7b235d2b43..adc67ca835f 100644
--- 
a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/InferPredicatesTest.java
+++ 
b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/InferPredicatesTest.java
@@ -17,33 +17,15 @@
 
 package org.apache.doris.nereids.rules.rewrite;
 
-import org.apache.doris.nereids.trees.expressions.Cast;
-import org.apache.doris.nereids.trees.expressions.EqualTo;
-import org.apache.doris.nereids.trees.expressions.Expression;
-import org.apache.doris.nereids.trees.expressions.literal.Literal;
 import org.apache.doris.nereids.trees.plans.JoinType;
-import org.apache.doris.nereids.trees.plans.logical.LogicalOlapScan;
-import org.apache.doris.nereids.types.BigIntType;
 import org.apache.doris.nereids.util.MemoPatternMatchSupported;
 import org.apache.doris.nereids.util.PlanChecker;
-import org.apache.doris.nereids.util.PlanConstructor;
 import org.apache.doris.utframe.TestWithFeService;
 
-import com.google.common.collect.Sets;
-import org.junit.jupiter.api.Assertions;
 import org.junit.jupiter.api.Test;
 
-import java.util.Optional;
-import java.util.Set;
-
 public class InferPredicatesTest extends TestWithFeService implements 
MemoPatternMatchSupported {
 
-    private final LogicalOlapScan scan1 = 
PlanConstructor.newLogicalOlapScan(0, "t1", 0);
-
-    private final LogicalOlapScan scan2 = 
PlanConstructor.newLogicalOlapScan(1, "t2", 0);
-
-    private final PredicatePropagation propagation = new 
PredicatePropagation();
-
     @Override
     protected void runBeforeAll() throws Exception {
         createDatabase("test");
@@ -646,16 +628,4 @@ public class InferPredicatesTest extends TestWithFeService 
implements MemoPatter
                         ).when(join -> join.getJoinType() == 
JoinType.LEFT_OUTER_JOIN)
                 );
     }
-
-    @Test
-    void testInfer() {
-        EqualTo equalTo = new EqualTo(new Cast(scan1.getOutput().get(0), 
BigIntType.INSTANCE), Literal.of(1));
-        EqualTo equalTo2 = new EqualTo(scan2.getOutput().get(0), 
scan1.getOutput().get(0));
-        Set<Expression> predicates = Sets.newHashSet();
-        predicates.add(equalTo2);
-        predicates.add(equalTo);
-        Set<Expression> newPredicates = propagation.infer(predicates);
-        Optional<Expression> newPredicate = newPredicates.stream().findFirst();
-        Assertions.assertTrue(newPredicate.get().equals(new EqualTo(new 
Cast(scan2.getOutput().get(0), BigIntType.INSTANCE), Literal.of(1))));
-    }
 }
diff --git 
a/regression-test/suites/nereids_p0/infer_predicate/infer_predicate.groovy 
b/regression-test/suites/nereids_p0/infer_predicate/infer_predicate.groovy
index 120c9a8f674..a1621f1c239 100644
--- a/regression-test/suites/nereids_p0/infer_predicate/infer_predicate.groovy
+++ b/regression-test/suites/nereids_p0/infer_predicate/infer_predicate.groovy
@@ -22,8 +22,6 @@ suite("test_infer_predicate") {
     sql 'drop table if exists infer_tb1;'
     sql 'drop table if exists infer_tb2;'
     sql 'drop table if exists infer_tb3;'
-    sql 'drop table if exists infer_tb4;'
-    sql 'drop table if exists infer_tb5;'
 
     sql '''create table infer_tb1 (k1 int, k2 int) distributed by hash(k1) 
buckets 3 properties('replication_num' = '1');'''
 
@@ -31,10 +29,6 @@ suite("test_infer_predicate") {
 
     sql '''create table infer_tb3 (k1 varchar(100), k2 int) distributed by 
hash(k1) buckets 3 properties('replication_num' = '1');'''
 
-    sql '''create table infer_tb4 (k1 varchar(100), k2 date) distributed by 
hash(k1) buckets 3 properties('replication_num' = '1');'''
-
-    sql '''create table infer_tb5 (k1 varchar(100), k3 date) distributed by 
hash(k1) buckets 3 properties('replication_num' = '1');'''
-
     explain {
         sql "select * from infer_tb1 inner join infer_tb2 where infer_tb2.k1 = 
infer_tb1.k2  and infer_tb2.k1 = 1;"
         contains "PREDICATES: k2"
@@ -61,16 +55,4 @@ suite("test_infer_predicate") {
         contains "PREDICATES: k3"
         contains "PREDICATES: k2"
     }
-
-    explain {
-        sql "select * from infer_tb4 left join infer_tb5 on infer_tb4.k2 = 
infer_tb5.k3 where infer_tb4.k2 = '20230901';"
-        contains "PREDICATES: k3"
-        contains "PREDICATES: k2"
-    }
-
-    sql 'drop table if exists infer_tb1;'
-    sql 'drop table if exists infer_tb2;'
-    sql 'drop table if exists infer_tb3;'
-    sql 'drop table if exists infer_tb4;'
-    sql 'drop table if exists infer_tb5;'
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to