jackwener commented on code in PR #20209:
URL: https://github.com/apache/doris/pull/20209#discussion_r1211559073
##########
fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/joinorder/hypergraph/receiver/PlanReceiver.java:
##########
@@ -335,58 +336,106 @@ private void makeLogicalExpression(Group root) {
}
}
+ /**
+ * The top project of (T1, T2, T3) is different after reorder
+ * we need merge Project1 and Project2 as Project4 after reorder
+ * T1 join T2 join T3:
+ * Project1(a, e + f)
+ * join(a = e)
+ * Project2(a, b + d as e)
+ * join(a = c)
+ * T1(a, b)
+ * T2(c, d)
+ * T3(e, f)
+ *
+ * after reorder:
+ * T1 join T3 join T2:
+ * Project4(a, b + d + f)
+ * join(a = c)
+ * Project3(a, b, f)
+ * join(a = e)
+ * T1(a, b)
+ * T3(e, f)
+ * T2(c, d)
+ */
+ private List<NamedExpression> mergeProjections(List<NamedExpression>
childProjects,
+ List<NamedExpression> parentProjects) {
+ Map<Expression, Alias> replaceMap = childProjects.stream().filter(e ->
e instanceof Alias)
+ .collect(Collectors.toMap(NamedExpression::toSlot, e ->
(Alias) e));
+ return parentProjects.stream().map(expr -> {
+ if (expr instanceof Alias) {
+ Alias alias = (Alias) expr;
+ Expression insideExpr = alias.child();
+ Expression newInsideExpr = insideExpr.rewriteUp(e -> {
+ Alias getAlias = replaceMap.get(e);
+ return getAlias == null ? e : getAlias.child();
+ });
+ return newInsideExpr == insideExpr ? expr
+ : alias.withChildren(ImmutableList.of(newInsideExpr));
+ } else {
+ Alias getAlias = replaceMap.get(expr);
+ return getAlias == null ? expr : getAlias;
+ }
+ }).collect(ImmutableList.toImmutableList());
+ }
+
private List<Plan> proposeProject(List<Plan> allChild, List<Edge> edges,
long left, long right) {
long fullKey = LongBitmap.newBitmapUnion(left, right);
List<Slot> outputs = allChild.get(0).getOutput();
Set<Slot> outputSet = allChild.get(0).getOutputSet();
- if (!projectsOnSubgraph.containsKey(fullKey)) {
- List<NamedExpression> projects = new ArrayList<>();
- // Calculate complex expression
- List<Long> bitmaps = complexProjectMap.keySet().stream()
- .filter(bitmap -> LongBitmap.isSubset(bitmap,
fullKey)).collect(Collectors.toList());
-
- for (long bitmap : bitmaps) {
- projects.addAll(complexProjectMap.get(bitmap));
- complexProjectMap.remove(bitmap);
+ List<NamedExpression> allProjects = Lists.newArrayList();
+
+ List<NamedExpression> complexProjects = new ArrayList<>();
+ // Calculate complex expression should be done by current(fullKey) node
+ // the complex projects includes final output of current node(the
complex project of fullKey)
+ // and any complex projects don't belong to subsets of fullKey except
that fullKey is not a join node
+ List<Long> bitmaps = complexProjectMap.keySet().stream().filter(bitmap
-> LongBitmap
+ .isSubset(bitmap, fullKey)
+ && ((!LongBitmap.isSubset(bitmap, left) &&
!LongBitmap.isSubset(bitmap, right))
+ || left == right))
+ .collect(Collectors.toList());
+
+ // complexProjectMap is created by a bottom up traverse of join tree,
so child node is put before parent node
+ // in the bitmaps
+ for (long bitmap : bitmaps) {
+ if (complexProjects.isEmpty()) {
+ complexProjects = complexProjectMap.get(bitmap);
+ } else {
+ complexProjects =
+ mergeProjections(complexProjects,
complexProjectMap.get(bitmap));
}
+ }
+ allProjects.addAll(complexProjects);
- // calculate required columns
- Set<Slot> requireSlots = calculateRequiredSlots(left, right,
edges);
- outputs.stream()
- .filter(e -> requireSlots.contains(e))
- .forEach(e -> projects.add(e));
+ // calculate required columns by all parents
+ Set<Slot> requireSlots = calculateRequiredSlots(left, right, edges);
- // propose physical project
- if (projects.isEmpty()) {
- projects.add(ExpressionUtils.selectMinimumColumn(outputs));
- }
- projectsOnSubgraph.put(fullKey, projects);
+ // add output slots belong to required slots to project list
+ allProjects.addAll(outputs.stream().filter(e ->
requireSlots.contains(e))
+ .collect(Collectors.toList()));
+
+ // propose physical project
+ if (allProjects.isEmpty()) {
+ allProjects.add(ExpressionUtils.selectMinimumColumn(outputs));
}
- List<NamedExpression> allProjects = projectsOnSubgraph.get(fullKey);
if (outputSet.equals(new HashSet<>(allProjects))) {
return allChild;
}
- while (true) {
- Set<Slot> childOutputSet = allChild.get(0).getOutputSet();
- List<NamedExpression> projects = allProjects.stream()
- .filter(expr ->
- childOutputSet.containsAll(expr.getInputSlots())
|| childOutputSet.contains(expr.toSlot()))
+
+ Set<Slot> childOutputSet = allChild.get(0).getOutputSet();
+ List<NamedExpression> projects = allProjects.stream()
+ .filter(expr ->
+ childOutputSet.containsAll(expr.getInputSlots()))
+ .collect(Collectors.toList());
+ if (!outputSet.equals(new HashSet<>(projects))) {
+ LogicalProperties projectProperties = new LogicalProperties(
+ () -> projects.stream().map(p ->
p.toSlot()).collect(Collectors.toList()));
+ allChild = allChild.stream()
+ .map(c -> new PhysicalProject<>(projects,
projectProperties, c))
.collect(Collectors.toList());
- if (!outputSet.equals(new HashSet<>(projects))) {
- LogicalProperties projectProperties = new LogicalProperties(
- () -> projects.stream().map(p ->
p.toSlot()).collect(Collectors.toList()));
- allChild = allChild.stream()
- .map(c -> new PhysicalProject<>(projects,
projectProperties, c))
- .collect(Collectors.toList());
- }
- if (projects.size() == 0) {
- throw new RuntimeException("dphyer fail process project");
- }
- if (projects.size() == allProjects.size()) {
- break;
- }
}
+ Preconditions.checkArgument(!projects.isEmpty() && projects.size() ==
allProjects.size());
Review Comment:
checkStates
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]