Airblader commented on a change in pull request #17011:
URL: https://github.com/apache/flink/pull/17011#discussion_r698432369



##########
File path: 
flink-table/flink-table-planner/src/main/java/org/apache/flink/table/planner/plan/rules/physical/stream/PushFilterPastChangelogNormalizeRule.java
##########
@@ -0,0 +1,240 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.table.planner.plan.rules.physical.stream;
+
+import org.apache.flink.annotation.Internal;
+import 
org.apache.flink.table.planner.plan.nodes.physical.stream.StreamPhysicalCalc;
+import 
org.apache.flink.table.planner.plan.nodes.physical.stream.StreamPhysicalChangelogNormalize;
+
+import org.apache.calcite.plan.RelOptRule;
+import org.apache.calcite.plan.RelOptRuleCall;
+import org.apache.calcite.plan.RelOptUtil;
+import org.apache.calcite.plan.RelRule;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rex.RexLocalRef;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.rex.RexProgram;
+import org.apache.calcite.rex.RexProgramBuilder;
+import org.apache.calcite.rex.RexUtil;
+import org.apache.calcite.tools.RelBuilder;
+import org.apache.calcite.util.Pair;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.Set;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+
+import static 
org.apache.flink.table.planner.plan.utils.RexNodeExtractor.extractRefInputFields;
+
+/**
+ * Pushes primary key filters through a {@link 
StreamPhysicalChangelogNormalize ChangelogNormalize}
+ * operator to reduce its state size.
+ *
+ * <p>This rule looks for Calc → ChangelogNormalize where the {@link 
StreamPhysicalCalc Calc}
+ * contains a filter condition. The condition is transformed into CNF and then 
each conjunction is
+ * tested for whether it affects only primary key columns. If such conditions 
exist, they are moved
+ * into a new, separate Calc and pushed through the ChangelogNormalize 
operator. ChangelogNormalize
+ * keeps state for every unique key it encounters, thus pushing filters on the 
primary key in front
+ * of it helps reduce the size of its state.
+ *
+ * <p>Note that pushing primary key filters is safe to do, but pushing any 
other filters would lead
+ * to incorrect results.

Review comment:
       I'll settle for "… can lead to …" which adds a bit less noise in the 
sentence (a system that is only broken in some scenarios is broken overall, 
IMO).

##########
File path: 
flink-table/flink-table-planner/src/main/java/org/apache/flink/table/planner/plan/rules/physical/stream/PushFilterPastChangelogNormalizeRule.java
##########
@@ -0,0 +1,240 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.table.planner.plan.rules.physical.stream;
+
+import org.apache.flink.annotation.Internal;
+import 
org.apache.flink.table.planner.plan.nodes.physical.stream.StreamPhysicalCalc;
+import 
org.apache.flink.table.planner.plan.nodes.physical.stream.StreamPhysicalChangelogNormalize;
+
+import org.apache.calcite.plan.RelOptRule;
+import org.apache.calcite.plan.RelOptRuleCall;
+import org.apache.calcite.plan.RelOptUtil;
+import org.apache.calcite.plan.RelRule;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rex.RexLocalRef;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.rex.RexProgram;
+import org.apache.calcite.rex.RexProgramBuilder;
+import org.apache.calcite.rex.RexUtil;
+import org.apache.calcite.tools.RelBuilder;
+import org.apache.calcite.util.Pair;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.Set;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+
+import static 
org.apache.flink.table.planner.plan.utils.RexNodeExtractor.extractRefInputFields;
+
+/**
+ * Pushes primary key filters through a {@link 
StreamPhysicalChangelogNormalize ChangelogNormalize}
+ * operator to reduce its state size.
+ *
+ * <p>This rule looks for Calc → ChangelogNormalize where the {@link 
StreamPhysicalCalc Calc}
+ * contains a filter condition. The condition is transformed into CNF and then 
each conjunction is
+ * tested for whether it affects only primary key columns. If such conditions 
exist, they are moved
+ * into a new, separate Calc and pushed through the ChangelogNormalize 
operator. ChangelogNormalize
+ * keeps state for every unique key it encounters, thus pushing filters on the 
primary key in front
+ * of it helps reduce the size of its state.
+ *
+ * <p>Note that pushing primary key filters is safe to do, but pushing any 
other filters would lead
+ * to incorrect results.
+ */
+@Internal
+public class PushFilterPastChangelogNormalizeRule
+        extends RelRule<PushFilterPastChangelogNormalizeRule.Config> {
+
+    public static final RelOptRule INSTANCE =
+            
Config.EMPTY.as(Config.class).onFilterWithChangelogNormalize().toRule();
+
+    public PushFilterPastChangelogNormalizeRule(Config config) {
+        super(config);
+    }
+
+    @Override
+    public void onMatch(RelOptRuleCall call) {
+        final StreamPhysicalCalc calc = call.rel(0);
+        final StreamPhysicalChangelogNormalize changelogNormalize = 
call.rel(1);
+
+        final RexProgram program = calc.getProgram();
+        final RexNode condition =
+                RexUtil.toCnf(
+                        call.builder().getRexBuilder(),
+                        program.expandLocalRef(program.getCondition()));
+
+        final Set<Integer> primaryKeyIndices =
+                
IntStream.of(changelogNormalize.uniqueKeys()).boxed().collect(Collectors.toSet());
+
+        // Determine which filters can be pushed (= involve only primary key 
columns)
+        final List<RexNode> primaryKeyPredicates = new ArrayList<>();
+        final List<RexNode> otherPredicates = new ArrayList<>();
+        partitionPrimaryKeyPredicates(
+                RelOptUtil.conjunctions(condition),
+                primaryKeyIndices,
+                primaryKeyPredicates,
+                otherPredicates);
+
+        // Construct a new ChangelogNormalize which has primary key filters 
pushed into it
+        final StreamPhysicalChangelogNormalize newChangelogNormalize =
+                pushFiltersThroughChangeNormalize(call, primaryKeyPredicates);
+
+        // Retain only filters which haven't been pushed
+        transformWithRemainingPredicates(call, newChangelogNormalize, 
otherPredicates);
+    }
+
+    /**
+     * Separates the given {@param predicates} into filters which affect only 
the primary key and
+     * anything else.
+     */
+    private void partitionPrimaryKeyPredicates(
+            List<RexNode> predicates,
+            Set<Integer> primaryKeyIndices,
+            List<RexNode> primaryKeyPredicates,
+            List<RexNode> remainingPredicates) {
+        for (RexNode predicate : predicates) {
+            int[] inputRefs = 
extractRefInputFields(Collections.singletonList(predicate));
+            if 
(Arrays.stream(inputRefs).allMatch(primaryKeyIndices::contains)) {

Review comment:
       Yeah, I mean I don't see any issues with that, anyway. `a = 1 && b = 42` 
is equivalent to `a = 1 && b = 42 && c = c`, right?

##########
File path: 
flink-table/flink-table-planner/src/main/java/org/apache/flink/table/planner/plan/rules/physical/stream/PushFilterPastChangelogNormalizeRule.java
##########
@@ -0,0 +1,240 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.table.planner.plan.rules.physical.stream;
+
+import org.apache.flink.annotation.Internal;
+import 
org.apache.flink.table.planner.plan.nodes.physical.stream.StreamPhysicalCalc;
+import 
org.apache.flink.table.planner.plan.nodes.physical.stream.StreamPhysicalChangelogNormalize;
+
+import org.apache.calcite.plan.RelOptRule;
+import org.apache.calcite.plan.RelOptRuleCall;
+import org.apache.calcite.plan.RelOptUtil;
+import org.apache.calcite.plan.RelRule;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rex.RexLocalRef;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.rex.RexProgram;
+import org.apache.calcite.rex.RexProgramBuilder;
+import org.apache.calcite.rex.RexUtil;
+import org.apache.calcite.tools.RelBuilder;
+import org.apache.calcite.util.Pair;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.Set;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+
+import static 
org.apache.flink.table.planner.plan.utils.RexNodeExtractor.extractRefInputFields;
+
+/**
+ * Pushes primary key filters through a {@link 
StreamPhysicalChangelogNormalize ChangelogNormalize}
+ * operator to reduce its state size.
+ *
+ * <p>This rule looks for Calc → ChangelogNormalize where the {@link 
StreamPhysicalCalc Calc}
+ * contains a filter condition. The condition is transformed into CNF and then 
each conjunction is
+ * tested for whether it affects only primary key columns. If such conditions 
exist, they are moved
+ * into a new, separate Calc and pushed through the ChangelogNormalize 
operator. ChangelogNormalize
+ * keeps state for every unique key it encounters, thus pushing filters on the 
primary key in front
+ * of it helps reduce the size of its state.
+ *
+ * <p>Note that pushing primary key filters is safe to do, but pushing any 
other filters would lead
+ * to incorrect results.
+ */
+@Internal
+public class PushFilterPastChangelogNormalizeRule
+        extends RelRule<PushFilterPastChangelogNormalizeRule.Config> {
+
+    public static final RelOptRule INSTANCE =
+            
Config.EMPTY.as(Config.class).onFilterWithChangelogNormalize().toRule();
+
+    public PushFilterPastChangelogNormalizeRule(Config config) {
+        super(config);
+    }
+
+    @Override
+    public void onMatch(RelOptRuleCall call) {
+        final StreamPhysicalCalc calc = call.rel(0);
+        final StreamPhysicalChangelogNormalize changelogNormalize = 
call.rel(1);
+
+        final RexProgram program = calc.getProgram();
+        final RexNode condition =
+                RexUtil.toCnf(
+                        call.builder().getRexBuilder(),
+                        program.expandLocalRef(program.getCondition()));
+
+        final Set<Integer> primaryKeyIndices =
+                
IntStream.of(changelogNormalize.uniqueKeys()).boxed().collect(Collectors.toSet());
+
+        // Determine which filters can be pushed (= involve only primary key 
columns)
+        final List<RexNode> primaryKeyPredicates = new ArrayList<>();
+        final List<RexNode> otherPredicates = new ArrayList<>();
+        partitionPrimaryKeyPredicates(
+                RelOptUtil.conjunctions(condition),
+                primaryKeyIndices,
+                primaryKeyPredicates,
+                otherPredicates);
+
+        // Construct a new ChangelogNormalize which has primary key filters 
pushed into it
+        final StreamPhysicalChangelogNormalize newChangelogNormalize =
+                pushFiltersThroughChangeNormalize(call, primaryKeyPredicates);
+
+        // Retain only filters which haven't been pushed
+        transformWithRemainingPredicates(call, newChangelogNormalize, 
otherPredicates);
+    }
+
+    /**
+     * Separates the given {@param predicates} into filters which affect only 
the primary key and
+     * anything else.
+     */
+    private void partitionPrimaryKeyPredicates(
+            List<RexNode> predicates,
+            Set<Integer> primaryKeyIndices,
+            List<RexNode> primaryKeyPredicates,
+            List<RexNode> remainingPredicates) {
+        for (RexNode predicate : predicates) {
+            int[] inputRefs = 
extractRefInputFields(Collections.singletonList(predicate));
+            if 
(Arrays.stream(inputRefs).allMatch(primaryKeyIndices::contains)) {
+                primaryKeyPredicates.add(predicate);
+            } else {
+                remainingPredicates.add(predicate);
+            }
+        }
+    }
+
+    /** Pushes {@param primaryKeyPredicates} into the {@link 
StreamPhysicalChangelogNormalize}. */
+    private StreamPhysicalChangelogNormalize pushFiltersThroughChangeNormalize(
+            RelOptRuleCall call, List<RexNode> primaryKeyPredicates) {
+        final StreamPhysicalChangelogNormalize changelogNormalize = 
call.rel(1);
+
+        if (primaryKeyPredicates.isEmpty()) {
+            // There are no filters which can be pushed, so just return the 
existing node.
+            return changelogNormalize;
+        }
+
+        final StreamPhysicalCalc pushedFiltersCalc =
+                projectIdentityWithConditions(
+                        call.builder(), changelogNormalize.getInput(), 
primaryKeyPredicates);
+
+        return (StreamPhysicalChangelogNormalize)
+                changelogNormalize.copy(
+                        changelogNormalize.getTraitSet(),
+                        Collections.singletonList(pushedFiltersCalc));
+    }
+
+    /**
+     * Returns a {@link StreamPhysicalCalc} with the given {@param conditions} 
and an identity
+     * projection.
+     */
+    private StreamPhysicalCalc projectIdentityWithConditions(
+            RelBuilder relBuilder, RelNode newInput, List<RexNode> conditions) 
{
+
+        final RexProgramBuilder programBuilder =
+                new RexProgramBuilder(newInput.getRowType(), 
relBuilder.getRexBuilder());
+        programBuilder.addIdentity();
+
+        final RexNode condition = relBuilder.and(conditions);
+        if (!condition.isAlwaysTrue()) {
+            programBuilder.addCondition(condition);
+        }
+
+        final RexProgram newProgram = programBuilder.getProgram();
+        return new StreamPhysicalCalc(
+                newInput.getCluster(),
+                newInput.getTraitSet(),
+                newInput,
+                newProgram,
+                newProgram.getOutputRowType());
+    }
+
+    /**
+     * Returns a {@link StreamPhysicalCalc} which is a copy of {@param calc}, 
but with the
+     * projections applied from {@param projectFromCalc}.
+     */
+    private StreamPhysicalCalc projectWith(
+            RelBuilder relBuilder, StreamPhysicalCalc projectFromCalc, 
StreamPhysicalCalc calc) {
+        final RexProgramBuilder programBuilder =
+                new RexProgramBuilder(calc.getRowType(), 
relBuilder.getRexBuilder());
+        if (calc.getProgram().getCondition() != null) {
+            programBuilder.addCondition(
+                    
calc.getProgram().expandLocalRef(calc.getProgram().getCondition()));
+        }
+
+        for (Pair<RexLocalRef, String> projectRef :
+                projectFromCalc.getProgram().getNamedProjects()) {
+            final RexNode project = 
projectFromCalc.getProgram().expandLocalRef(projectRef.left);
+            programBuilder.addProject(project, projectRef.right);
+        }
+
+        final RexProgram newProgram = programBuilder.getProgram();

Review comment:
       Yeah, it is using local refs again.
   
   
![screenshot-2021-08-30_14-25-10](https://user-images.githubusercontent.com/2392216/131338518-ef705c0f-71f2-426b-a875-68b4470c0b9e.png)
   




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@flink.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to