This is an automated email from the ASF dual-hosted git repository.

huajianlan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new a49bde8a71 [fix](Nereids)statistics calculator for Project and 
Aggregate lost some columns (#12196)
a49bde8a71 is described below

commit a49bde8a718ec96f107ac7d44dda8a9017f7ebc4
Author: morrySnow <[email protected]>
AuthorDate: Wed Aug 31 20:47:22 2022 +0800

    [fix](Nereids)statistics calculator for Project and Aggregate lost some 
columns (#12196)
    
    There are some bugs in Nereids' StatsCalculator.
    
    1. Project: return child column stats directly, so its parents cannot find 
column stats from project's slot.
    2. Aggregate: do not return column that is Alias, its parents cannot find 
some column stats from Aggregate's slot.
    3. All: use SlotReference as key of column to stats map. So we need change 
SlotReference's equals and hashCode method to just using ExprId as we discussed.
---
 .../doris/nereids/stats/StatsCalculator.java       | 54 ++++++++++++----------
 .../nereids/trees/expressions/SlotReference.java   |  9 ++--
 .../org/apache/doris/statistics/ColumnStats.java   |  9 ++++
 .../doris/nereids/trees/plans/PlanEqualsTest.java  | 14 +++---
 4 files changed, 49 insertions(+), 37 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
index 286dc12eff..50052a959b 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
@@ -59,6 +59,9 @@ import org.apache.doris.statistics.Statistics;
 import org.apache.doris.statistics.StatsDeriveResult;
 import org.apache.doris.statistics.TableStats;
 
+import com.google.common.collect.Maps;
+
+import java.util.AbstractMap;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
@@ -250,46 +253,49 @@ public class StatsCalculator extends 
DefaultPlanVisitor<StatsDeriveResult, Void>
     }
 
     private StatsDeriveResult computeAggregate(Aggregate aggregate) {
-        List<Expression> groupByExprList = aggregate.getGroupByExpressions();
+        List<Expression> groupByExpressions = 
aggregate.getGroupByExpressions();
         StatsDeriveResult childStats = groupExpression.getCopyOfChildStats(0);
-        Map<Slot, ColumnStats> childSlotColumnStatsMap = 
childStats.getSlotToColumnStats();
+        Map<Slot, ColumnStats> childSlotToColumnStats = 
childStats.getSlotToColumnStats();
         long resultSetCount = 1;
-        for (Expression expression : groupByExprList) {
-            List<SlotReference> slotRefList = 
expression.collect(SlotReference.class::isInstance);
+        for (Expression groupByExpression : groupByExpressions) {
+            List<SlotReference> slotReferences = 
groupByExpression.collect(SlotReference.class::isInstance);
             // TODO: Support more complex group expr.
             //       For example:
             //              select max(col1+col3) from t1 group by col1+col3;
-            if (slotRefList.size() != 1) {
+            if (slotReferences.size() != 1) {
                 continue;
             }
-            SlotReference slotRef = slotRefList.get(0);
-            ColumnStats columnStats = childSlotColumnStatsMap.get(slotRef);
+            SlotReference slotReference = slotReferences.get(0);
+            ColumnStats columnStats = 
childSlotToColumnStats.get(slotReference);
             resultSetCount *= columnStats.getNdv();
         }
-        Map<Slot, ColumnStats> slotColumnStatsMap = new HashMap<>();
-        List<NamedExpression> namedExpressionList = 
aggregate.getOutputExpressions();
+        Map<Slot, ColumnStats> slotToColumnStats = Maps.newHashMap();
+        List<NamedExpression> outputExpressions = 
aggregate.getOutputExpressions();
         // TODO: 1. Estimate the output unit size by the type of corresponding 
AggregateFunction
         //       2. Handle alias, literal in the output expression list
-        for (NamedExpression namedExpression : namedExpressionList) {
-            if (namedExpression instanceof SlotReference) {
-                slotColumnStatsMap.put((SlotReference) namedExpression, new 
ColumnStats());
-            }
+        for (NamedExpression outputExpression : outputExpressions) {
+            slotToColumnStats.put(outputExpression.toSlot(), new 
ColumnStats());
         }
-        StatsDeriveResult statsDeriveResult = new 
StatsDeriveResult(resultSetCount, slotColumnStatsMap);
+        StatsDeriveResult statsDeriveResult = new 
StatsDeriveResult(resultSetCount, slotToColumnStats);
         // TODO: Update ColumnStats properly, add new mapping from output slot 
to ColumnStats
         return statsDeriveResult;
     }
 
-    // TODO: Update data size and min/max value.
+    // TODO: do real project on column stats
     private StatsDeriveResult computeProject(Project project) {
-        List<NamedExpression> namedExpressionList = project.getProjects();
-        List<Slot> slotSet = 
namedExpressionList.stream().flatMap(namedExpression -> {
-            List<Slot> slotReferenceList = 
namedExpression.collect(SlotReference.class::isInstance);
-            return slotReferenceList.stream();
-        }).collect(Collectors.toList());
-        StatsDeriveResult stat = groupExpression.getCopyOfChildStats(0);
-        Map<Slot, ColumnStats> slotColumnStatsMap = 
stat.getSlotToColumnStats();
-        slotColumnStatsMap.entrySet().removeIf(entry -> 
!slotSet.contains(entry.getKey()));
-        return stat;
+        List<NamedExpression> projections = project.getProjects();
+        StatsDeriveResult statsDeriveResult = 
groupExpression.getCopyOfChildStats(0);
+        Map<Slot, ColumnStats> childColumnStats = 
statsDeriveResult.getSlotToColumnStats();
+        Map<Slot, ColumnStats> columnsStats = 
projections.stream().map(projection -> {
+            List<SlotReference> slotReferences = 
projection.collect(SlotReference.class::isInstance);
+            if (slotReferences.isEmpty()) {
+                return new AbstractMap.SimpleEntry<>(projection.toSlot(), 
ColumnStats.createDefaultColumnStats());
+            } else {
+                // TODO: just a trick here, need to do real project on column 
stats
+                return new AbstractMap.SimpleEntry<>(projection.toSlot(), 
childColumnStats.get(slotReferences.get(0)));
+            }
+        }).collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
+        statsDeriveResult.setSlotToColumnStats(columnsStats);
+        return statsDeriveResult;
     }
 }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/SlotReference.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/SlotReference.java
index 24a601a2e5..cc77e75f7f 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/SlotReference.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/SlotReference.java
@@ -116,16 +116,13 @@ public class SlotReference extends Slot {
             return false;
         }
         SlotReference that = (SlotReference) o;
-        return nullable == that.nullable
-                && dataType.equals(that.dataType)
-                && exprId.equals(that.exprId)
-                && name.equals(that.name)
-                && qualifier.equals(that.qualifier);
+        return exprId.equals(that.exprId);
+
     }
 
     @Override
     public int hashCode() {
-        return Objects.hash(exprId, name, qualifier, nullable);
+        return Objects.hash(exprId);
     }
 
     public Column getColumn() {
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStats.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStats.java
index 30fb619630..cd28ea263d 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStats.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStats.java
@@ -76,6 +76,15 @@ public class ColumnStats {
     private LiteralExpr minValue;
     private LiteralExpr maxValue;
 
+    public static ColumnStats createDefaultColumnStats() {
+        ColumnStats columnStats = new ColumnStats();
+        columnStats.setAvgSize(1);
+        columnStats.setMaxSize(1);
+        columnStats.setNdv(1);
+        columnStats.setNumNulls(0);
+        return columnStats;
+    }
+
     public ColumnStats(ColumnStats other) {
         this.ndv = other.ndv;
         this.avgSize = other.avgSize;
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/PlanEqualsTest.java
 
b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/PlanEqualsTest.java
index a9e9c6c094..1fd9680a03 100644
--- 
a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/PlanEqualsTest.java
+++ 
b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/PlanEqualsTest.java
@@ -65,7 +65,7 @@ public class PlanEqualsTest {
         Assertions.assertEquals(expected, actual);
 
         LogicalAggregate<Plan> unexpected = new 
LogicalAggregate<>(Lists.newArrayList(), ImmutableList.of(
-                new SlotReference(new ExprId(0), "b", BigIntType.INSTANCE, 
true, Lists.newArrayList())),
+                new SlotReference(new ExprId(1), "b", BigIntType.INSTANCE, 
true, Lists.newArrayList())),
                 child);
         Assertions.assertNotEquals(unexpected, actual);
     }
@@ -95,8 +95,8 @@ public class PlanEqualsTest {
         Assertions.assertEquals(expected, actual);
 
         LogicalJoin<Plan, Plan> unexpected = new 
LogicalJoin<>(JoinType.INNER_JOIN, Lists.newArrayList(new EqualTo(
-                new SlotReference("a", BigIntType.INSTANCE, false, 
Lists.newArrayList()),
-                new SlotReference("b", BigIntType.INSTANCE, true, 
Lists.newArrayList()))),
+                new SlotReference(new ExprId(2), "a", BigIntType.INSTANCE, 
false, Lists.newArrayList()),
+                new SlotReference(new ExprId(3), "b", BigIntType.INSTANCE, 
true, Lists.newArrayList()))),
                 Optional.empty(), left, right);
         Assertions.assertNotEquals(unexpected, actual);
     }
@@ -151,7 +151,7 @@ public class PlanEqualsTest {
 
         LogicalSort<Plan> unexpected = new LogicalSort<>(
                 ImmutableList.of(new OrderKey(
-                        new SlotReference(new ExprId(1), "a", 
BigIntType.INSTANCE, true, Lists.newArrayList()), true,
+                        new SlotReference(new ExprId(2), "a", 
BigIntType.INSTANCE, true, Lists.newArrayList()), true,
                         true)),
                 child);
         Assertions.assertNotEquals(unexpected, actual);
@@ -211,8 +211,8 @@ public class PlanEqualsTest {
 
         PhysicalHashJoin<Plan, Plan> unexpected = new 
PhysicalHashJoin<>(JoinType.INNER_JOIN,
                 Lists.newArrayList(new EqualTo(
-                        new SlotReference("a", BigIntType.INSTANCE, false, 
Lists.newArrayList()),
-                        new SlotReference("b", BigIntType.INSTANCE, true, 
Lists.newArrayList()))),
+                        new SlotReference(new ExprId(2), "a", 
BigIntType.INSTANCE, false, Lists.newArrayList()),
+                        new SlotReference(new ExprId(3), "b", 
BigIntType.INSTANCE, true, Lists.newArrayList()))),
                 Optional.empty(), logicalProperties, left, right);
         Assertions.assertNotEquals(unexpected, actual);
     }
@@ -288,7 +288,7 @@ public class PlanEqualsTest {
 
         PhysicalQuickSort<Plan> unexpected = new PhysicalQuickSort<>(
                 ImmutableList.of(new OrderKey(
-                        new SlotReference(new ExprId(1), "a", 
BigIntType.INSTANCE, true, Lists.newArrayList()), true,
+                        new SlotReference(new ExprId(2), "a", 
BigIntType.INSTANCE, true, Lists.newArrayList()), true,
                         true)),
                 logicalProperties,
                 child);


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to