This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git


The following commit(s) were added to refs/heads/master by this push:
     new aa17d40  [Bug-fix] Update correct data partition of fragment which 
contains Repeat Node (#5910)
aa17d40 is described below

commit aa17d408658124ee78f56fc7988ef93fd0a39c2f
Author: EmmyMiao87 <[email protected]>
AuthorDate: Thu May 27 22:06:10 2021 +0800

    [Bug-fix] Update correct data partition of fragment which contains Repeat 
Node (#5910)
    
    The Repeat Node will change the data partition of fragment
      when the origin data partition of fragment is HashPartition.
    The Repeat Node will generate some new rows.
    The distribution of these new rows is completely inconsistent with the 
original data distribution,
      their distribution is RANDOM.
    
    If the data distribution is not corrected,
      an error will occur when the agg node determines whether to perform 
colocate.
    Wrong data distribution will cause the agg node to think that agg can be 
colocated,
      leading to wrong results.
    For example, the following query can not be colocated although the 
distributed column of table is k1:
    ```
    SELECT k1, k2, SUM( k3 )
    FROM table
    GROUP BY GROUPING SETS ( (k1, k2), (k1), (k2), ( ) )
    ```
---
 .../apache/doris/planner/DistributedPlanner.java   | 31 ++++++++++++++++++++++
 .../org/apache/doris/planner/PlanFragment.java     |  9 ++++++-
 2 files changed, 39 insertions(+), 1 deletion(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/planner/DistributedPlanner.java 
b/fe/fe-core/src/main/java/org/apache/doris/planner/DistributedPlanner.java
index 8f61a49..b0126bc 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/DistributedPlanner.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/DistributedPlanner.java
@@ -1043,6 +1043,37 @@ public class DistributedPlanner {
             throws UserException {
         
repeatNode.setNumInstances(childFragment.getPlanRoot().getNumInstances());
         childFragment.addPlanRoot(repeatNode);
+        /*
+        The Repeat Node will change the data partition of fragment
+          when the origin data partition of fragment is HashPartition.
+        For example,
+        Query: SELECT k1, k2, sum(v1)
+               FROM table
+               GROUP BY GROUPING SETS ((k1, k2), (k1), (k2), ( ))
+        Table schema: table distributed by k1
+        The Child Fragment:
+               Fragment 0
+                   Data partition: k1
+                   Repeat Node: repeat 3 lines [[0, 1], [0], [1], []]
+                   OlapScanNode: table
+        Data before Repeat Node is partitioned by k1 such as:
+          | Node 1 |  | Node 2 |
+          | 1, 1   |  | 2, 1   |
+          | 1, 2   |  | 2, 2   |
+        Data after Repeat Node is partitioned by RANDOM such as:
+          | Node 1 |  | Node 2 |
+          | 1, 1   |  | 2, 1   |
+          | 1, 2   |  | 2, 2   |
+          | null,1 |  | null,1 |
+          | null,2 |  | null,2 |
+          ...
+        The Repeat Node will generate some new rows.
+        The distribution of these new rows is completely inconsistent with the 
original data distribution,
+          their distribution is RANDOM.
+        Therefore, the data distribution method of the fragment needs to be 
modified here.
+        Only the correct data distribution can make the correct result when 
judging **colocate**.
+         */
+        childFragment.updateDataPartition(DataPartition.RANDOM);
         return childFragment;
     }
 
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/planner/PlanFragment.java 
b/fe/fe-core/src/main/java/org/apache/doris/planner/PlanFragment.java
index 234e7d3..f714132 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/PlanFragment.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/PlanFragment.java
@@ -96,7 +96,7 @@ public class PlanFragment extends TreeNode<PlanFragment> {
     // specification of the partition of the input of this fragment;
     // an UNPARTITIONED fragment is executed on only a single node
     // TODO: improve this comment, "input" is a bit misleading
-    private final DataPartition dataPartition;
+    private DataPartition dataPartition;
 
     // specification of the actually input partition of this fragment when 
transmitting to be.
     // By default, the value of the data partition in planner and the data 
partition transmitted to be are the same.
@@ -267,6 +267,13 @@ public class PlanFragment extends TreeNode<PlanFragment> {
         return (dataPartition.getType() != TPartitionType.UNPARTITIONED);
     }
 
+    public void updateDataPartition(DataPartition dataPartition) {
+        if (this.dataPartition == DataPartition.UNPARTITIONED) {
+            return;
+        }
+        this.dataPartition = dataPartition;
+    }
+
     public PlanFragmentId getId() { return fragmentId; }
 
     public PlanFragment getDestFragment() {

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to