github-actions[bot] commented on code in PR #61584: URL: https://github.com/apache/doris/pull/61584#discussion_r2969406719
########## fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/PushDownMatchPredicateAsVirtualColumnTest.java: ########## @@ -0,0 +1,499 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.rules.rewrite; + +import org.apache.doris.catalog.KeysType; +import org.apache.doris.nereids.rules.exploration.join.JoinReorderContext; +import org.apache.doris.nereids.trees.expressions.Alias; +import org.apache.doris.nereids.trees.expressions.Cast; +import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.GreaterThan; +import org.apache.doris.nereids.trees.expressions.IsNull; +import org.apache.doris.nereids.trees.expressions.MatchAny; +import org.apache.doris.nereids.trees.expressions.Not; +import org.apache.doris.nereids.trees.expressions.Or; +import org.apache.doris.nereids.trees.expressions.Slot; +import org.apache.doris.nereids.trees.expressions.SlotReference; +import org.apache.doris.nereids.trees.expressions.literal.IntegerLiteral; +import org.apache.doris.nereids.trees.expressions.literal.StringLiteral; +import org.apache.doris.nereids.trees.plans.JoinType; +import org.apache.doris.nereids.trees.plans.Plan; +import org.apache.doris.nereids.trees.plans.logical.LogicalFilter; +import org.apache.doris.nereids.trees.plans.logical.LogicalJoin; +import org.apache.doris.nereids.trees.plans.logical.LogicalOlapScan; +import org.apache.doris.nereids.trees.plans.logical.LogicalProject; +import org.apache.doris.nereids.types.StringType; +import org.apache.doris.nereids.util.MemoPatternMatchSupported; +import org.apache.doris.nereids.util.MemoTestUtils; +import org.apache.doris.nereids.util.PlanChecker; +import org.apache.doris.nereids.util.PlanConstructor; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableSet; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import java.util.List; + +/** + * Test for PushDownMatchPredicateAsVirtualColumn rule. + */ +public class PushDownMatchPredicateAsVirtualColumnTest implements MemoPatternMatchSupported { + + /** + * Pattern 1: Filter -> Join -> Project -> OlapScan + * WHERE (CAST(name) MATCH_ANY 'hello' OR right.col IS NOT NULL) + * Should push MATCH as virtual column on scan and replace in filter predicate. + */ + @Test + void testPattern1FilterJoinProjectScan() { + // Left side: scan with Project[id, CAST(name) as fn] + LogicalOlapScan leftScan = PlanConstructor.newLogicalOlapScan(0, "t1", 0); + List<Slot> leftSlots = leftScan.getOutput(); + Slot idSlot = leftSlots.get(0); + Slot nameSlot = leftSlots.get(1); + + // CAST(name AS STRING) as fn — this loses originalColumn metadata on the alias slot + Cast castExpr = new Cast(nameSlot, StringType.INSTANCE); + Alias fnAlias = new Alias(castExpr, "fn"); + LogicalProject<LogicalOlapScan> leftProject = new LogicalProject<>( + ImmutableList.of(idSlot, fnAlias), leftScan); + + // Right side: another scan + LogicalOlapScan rightScan = PlanConstructor.newLogicalOlapScan(1, "t2", 0); + Slot rightIdSlot = rightScan.getOutput().get(0); + + // Join: LEFT_OUTER + LogicalJoin<LogicalProject<LogicalOlapScan>, LogicalOlapScan> join = new LogicalJoin<>( + JoinType.LEFT_OUTER_JOIN, leftProject, rightScan, new JoinReorderContext()); + + // Filter: fn MATCH_ANY 'hello' OR rightId IS NOT NULL + Slot fnSlot = fnAlias.toSlot(); + MatchAny matchExpr = new MatchAny(fnSlot, new StringLiteral("hello")); + Or orPredicate = new Or(matchExpr, new Not(new IsNull(rightIdSlot))); + + LogicalFilter<?> filter = new LogicalFilter<>(ImmutableSet.of(orPredicate), join); + + Plan root = PlanChecker.from(MemoTestUtils.createConnectContext(), filter) + .applyTopDown(new PushDownMatchPredicateAsVirtualColumn()) + .getPlan(); + + // Verify plan structure: Filter -> Join -> Project -> OlapScan + Assertions.assertInstanceOf(LogicalFilter.class, root); + LogicalFilter<?> resFilter = (LogicalFilter<?>) root; + Assertions.assertInstanceOf(LogicalJoin.class, resFilter.child()); + LogicalJoin<?, ?> resJoin = (LogicalJoin<?, ?>) resFilter.child(); + Assertions.assertInstanceOf(LogicalProject.class, resJoin.left()); + LogicalProject<?> resProject = (LogicalProject<?>) resJoin.left(); + Assertions.assertInstanceOf(LogicalOlapScan.class, resProject.child()); + LogicalOlapScan resScan = (LogicalOlapScan) resProject.child(); + + // Verify virtual column was created on scan + Assertions.assertEquals(1, resScan.getVirtualColumns().size()); + Alias vcAlias = (Alias) resScan.getVirtualColumns().get(0); + Assertions.assertInstanceOf(MatchAny.class, vcAlias.child()); + + // Verify the MATCH in the virtual column uses the original CAST(name) expression, not the alias slot + MatchAny vcMatch = (MatchAny) vcAlias.child(); + Assertions.assertInstanceOf(Cast.class, vcMatch.left()); + + // Verify project has the virtual column slot appended + Assertions.assertEquals(3, resProject.getProjects().size()); + + // Verify filter predicate replaced MATCH with slot reference + Expression resPredicate = resFilter.getConjuncts().iterator().next(); + Assertions.assertInstanceOf(Or.class, resPredicate); + Or resOr = (Or) resPredicate; + Assertions.assertInstanceOf(SlotReference.class, resOr.child(0)); + } + + /** + * Pattern 2: Filter -> Join -> Project -> Filter -> OlapScan + */ + @Test + void testPattern2FilterJoinProjectFilterScan() { + LogicalOlapScan leftScan = PlanConstructor.newLogicalOlapScan(0, "t1", 0); + List<Slot> leftSlots = leftScan.getOutput(); + Slot idSlot = leftSlots.get(0); + Slot nameSlot = leftSlots.get(1); + + // Inner filter on scan + GreaterThan innerPred = new GreaterThan(idSlot, new IntegerLiteral(0)); + LogicalFilter<LogicalOlapScan> innerFilter = new LogicalFilter<>( + ImmutableSet.of(innerPred), leftScan); + + Cast castExpr = new Cast(nameSlot, StringType.INSTANCE); + Alias fnAlias = new Alias(castExpr, "fn"); + LogicalProject<LogicalFilter<LogicalOlapScan>> leftProject = new LogicalProject<>( + ImmutableList.of(idSlot, fnAlias), innerFilter); + + LogicalOlapScan rightScan = PlanConstructor.newLogicalOlapScan(1, "t2", 0); + Slot rightIdSlot = rightScan.getOutput().get(0); + + LogicalJoin<?, ?> join = new LogicalJoin<>( + JoinType.LEFT_OUTER_JOIN, leftProject, rightScan, new JoinReorderContext()); + + Slot fnSlot = fnAlias.toSlot(); + MatchAny matchExpr = new MatchAny(fnSlot, new StringLiteral("hello")); + Or orPredicate = new Or(matchExpr, new Not(new IsNull(rightIdSlot))); + LogicalFilter<?> outerFilter = new LogicalFilter<>(ImmutableSet.of(orPredicate), join); + + Plan root = PlanChecker.from(MemoTestUtils.createConnectContext(), outerFilter) + .applyTopDown(new PushDownMatchPredicateAsVirtualColumn()) + .getPlan(); + + // Verify structure: Filter -> Join -> Project -> Filter -> OlapScan + Assertions.assertInstanceOf(LogicalFilter.class, root); + LogicalFilter<?> resFilter = (LogicalFilter<?>) root; + LogicalJoin<?, ?> resJoin = (LogicalJoin<?, ?>) resFilter.child(); + LogicalProject<?> resProject = (LogicalProject<?>) resJoin.left(); + Assertions.assertInstanceOf(LogicalFilter.class, resProject.child()); + LogicalFilter<?> resInnerFilter = (LogicalFilter<?>) resProject.child(); + LogicalOlapScan resScan = (LogicalOlapScan) resInnerFilter.child(); + + // Virtual column on scan, inner filter preserved + Assertions.assertEquals(1, resScan.getVirtualColumns().size()); + Assertions.assertEquals(ImmutableSet.of(innerPred), resInnerFilter.getConjuncts()); + } + + /** + * When slot has originalColumn and originalTable (metadata intact), + * the rule should NOT trigger — no pushdown needed. + */ + @Test + void testMetadataIntactSkipsPushDown() { + LogicalOlapScan leftScan = PlanConstructor.newLogicalOlapScan(0, "t1", 0); + List<Slot> leftSlots = leftScan.getOutput(); + Slot idSlot = leftSlots.get(0); + Slot nameSlot = leftSlots.get(1); + + // Project directly passes through nameSlot (no CAST wrapper) — metadata preserved + LogicalProject<LogicalOlapScan> leftProject = new LogicalProject<>( + ImmutableList.of(idSlot, nameSlot), leftScan); + + LogicalOlapScan rightScan = PlanConstructor.newLogicalOlapScan(1, "t2", 0); + Slot rightIdSlot = rightScan.getOutput().get(0); + + LogicalJoin<?, ?> join = new LogicalJoin<>( + JoinType.LEFT_OUTER_JOIN, leftProject, rightScan, new JoinReorderContext()); + + // MATCH on nameSlot which has full metadata + MatchAny matchExpr = new MatchAny(nameSlot, new StringLiteral("hello")); + Or orPredicate = new Or(matchExpr, new Not(new IsNull(rightIdSlot))); + LogicalFilter<?> filter = new LogicalFilter<>(ImmutableSet.of(orPredicate), join); + + Plan root = PlanChecker.from(MemoTestUtils.createConnectContext(), filter) + .applyTopDown(new PushDownMatchPredicateAsVirtualColumn()) + .getPlan(); + + // Rule should not trigger — no virtual columns added + LogicalFilter<?> resFilter = (LogicalFilter<?>) root; + LogicalJoin<?, ?> resJoin = (LogicalJoin<?, ?>) resFilter.child(); + LogicalProject<?> resProject = (LogicalProject<?>) resJoin.left(); + LogicalOlapScan resScan = (LogicalOlapScan) resProject.child(); + Assertions.assertTrue(resScan.getVirtualColumns().isEmpty()); + } + + /** + * Non-DUP_KEYS/non-MOW table should not trigger the rule. + */ + @Test + void testNonDupKeysTableSkipsPushDown() { + // AGG_KEYS table + LogicalOlapScan leftScan = new LogicalOlapScan(PlanConstructor.getNextRelationId(), + PlanConstructor.newOlapTable(0, "t_agg", 0, KeysType.AGG_KEYS), + ImmutableList.of("db")); + List<Slot> leftSlots = leftScan.getOutput(); + Slot idSlot = leftSlots.get(0); + Slot nameSlot = leftSlots.get(1); + + Cast castExpr = new Cast(nameSlot, StringType.INSTANCE); + Alias fnAlias = new Alias(castExpr, "fn"); + LogicalProject<LogicalOlapScan> leftProject = new LogicalProject<>( + ImmutableList.of(idSlot, fnAlias), leftScan); + + LogicalOlapScan rightScan = PlanConstructor.newLogicalOlapScan(1, "t2", 0); + Slot rightIdSlot = rightScan.getOutput().get(0); + + LogicalJoin<?, ?> join = new LogicalJoin<>( + JoinType.LEFT_OUTER_JOIN, leftProject, rightScan, new JoinReorderContext()); + + Slot fnSlot = fnAlias.toSlot(); + MatchAny matchExpr = new MatchAny(fnSlot, new StringLiteral("hello")); + Or orPredicate = new Or(matchExpr, new Not(new IsNull(rightIdSlot))); + LogicalFilter<?> filter = new LogicalFilter<>(ImmutableSet.of(orPredicate), join); + + Plan root = PlanChecker.from(MemoTestUtils.createConnectContext(), filter) + .applyTopDown(new PushDownMatchPredicateAsVirtualColumn()) + .getPlan(); + + // Rule should not trigger for AGG_KEYS table + LogicalFilter<?> resFilter = (LogicalFilter<?>) root; + LogicalJoin<?, ?> resJoin = (LogicalJoin<?, ?>) resFilter.child(); + LogicalProject<?> resProject = (LogicalProject<?>) resJoin.left(); + LogicalOlapScan resScan = (LogicalOlapScan) resProject.child(); + Assertions.assertTrue(resScan.getVirtualColumns().isEmpty()); + } + + /** + * Scan already has existing virtual columns — new ones should be appended, not replace them. + */ + @Test + void testAppendToExistingVirtualColumns() { + LogicalOlapScan leftScan = PlanConstructor.newLogicalOlapScan(0, "t1", 0); + List<Slot> leftSlots = leftScan.getOutput(); + Slot idSlot = leftSlots.get(0); + Slot nameSlot = leftSlots.get(1); + + // Pre-existing virtual column + Alias existingVc = new Alias(new GreaterThan(idSlot, new IntegerLiteral(5)), "cse_vc"); + LogicalOlapScan scanWithVc = leftScan.withVirtualColumns(ImmutableList.of(existingVc)); + + Cast castExpr = new Cast(nameSlot, StringType.INSTANCE); + Alias fnAlias = new Alias(castExpr, "fn"); + LogicalProject<LogicalOlapScan> leftProject = new LogicalProject<>( + ImmutableList.of(idSlot, fnAlias), scanWithVc); + + LogicalOlapScan rightScan = PlanConstructor.newLogicalOlapScan(1, "t2", 0); + Slot rightIdSlot = rightScan.getOutput().get(0); + + LogicalJoin<?, ?> join = new LogicalJoin<>( + JoinType.LEFT_OUTER_JOIN, leftProject, rightScan, new JoinReorderContext()); + + Slot fnSlot = fnAlias.toSlot(); + MatchAny matchExpr = new MatchAny(fnSlot, new StringLiteral("hello")); + Or orPredicate = new Or(matchExpr, new Not(new IsNull(rightIdSlot))); + LogicalFilter<?> filter = new LogicalFilter<>(ImmutableSet.of(orPredicate), join); + + Plan root = PlanChecker.from(MemoTestUtils.createConnectContext(), filter) + .applyTopDown(new PushDownMatchPredicateAsVirtualColumn()) + .getPlan(); + + // Verify both virtual columns exist + LogicalFilter<?> resFilter = (LogicalFilter<?>) root; + LogicalJoin<?, ?> resJoin = (LogicalJoin<?, ?>) resFilter.child(); + LogicalProject<?> resProject = (LogicalProject<?>) resJoin.left(); + LogicalOlapScan resScan = (LogicalOlapScan) resProject.child(); + Assertions.assertEquals(2, resScan.getVirtualColumns().size()); + + // Existing one preserved + Alias firstVc = (Alias) resScan.getVirtualColumns().get(0); + Assertions.assertInstanceOf(GreaterThan.class, firstVc.child()); + + // New MATCH one appended + Alias secondVc = (Alias) resScan.getVirtualColumns().get(1); + Assertions.assertInstanceOf(MatchAny.class, secondVc.child()); + } + + /** + * Pattern 1R: Filter -> Join -> right(Project -> OlapScan) + * MATCH references a column from the right side of the join. + */ + @Test + void testPattern1RightSideFilterJoinProjectScan() { + // Left side: plain scan + LogicalOlapScan leftScan = PlanConstructor.newLogicalOlapScan(0, "t1", 0); + Slot leftIdSlot = leftScan.getOutput().get(0); + + // Right side: scan with Project[id, CAST(name) as fn] + LogicalOlapScan rightScan = PlanConstructor.newLogicalOlapScan(1, "t2", 0); + List<Slot> rightSlots = rightScan.getOutput(); + Slot rightIdSlot = rightSlots.get(0); + Slot rightNameSlot = rightSlots.get(1); + + Cast castExpr = new Cast(rightNameSlot, StringType.INSTANCE); + Alias fnAlias = new Alias(castExpr, "fn"); + LogicalProject<LogicalOlapScan> rightProject = new LogicalProject<>( + ImmutableList.of(rightIdSlot, fnAlias), rightScan); + + // Join: RIGHT_OUTER — project on right side + LogicalJoin<LogicalOlapScan, LogicalProject<LogicalOlapScan>> join = new LogicalJoin<>( + JoinType.RIGHT_OUTER_JOIN, leftScan, rightProject, new JoinReorderContext()); + + // Filter: fn MATCH_ANY 'hello' OR leftId IS NOT NULL + Slot fnSlot = fnAlias.toSlot(); + MatchAny matchExpr = new MatchAny(fnSlot, new StringLiteral("hello")); + Or orPredicate = new Or(matchExpr, new Not(new IsNull(leftIdSlot))); + LogicalFilter<?> filter = new LogicalFilter<>(ImmutableSet.of(orPredicate), join); + + Plan root = PlanChecker.from(MemoTestUtils.createConnectContext(), filter) + .applyTopDown(new PushDownMatchPredicateAsVirtualColumn()) + .getPlan(); + + // Verify plan structure: Filter -> Join -> left(unchanged), right(Project -> OlapScan with VC) + Assertions.assertInstanceOf(LogicalFilter.class, root); + LogicalFilter<?> resFilter = (LogicalFilter<?>) root; + LogicalJoin<?, ?> resJoin = (LogicalJoin<?, ?>) resFilter.child(); + + // Left side unchanged + Assertions.assertInstanceOf(LogicalOlapScan.class, resJoin.left()); + + // Right side has virtual column + Assertions.assertInstanceOf(LogicalProject.class, resJoin.right()); + LogicalProject<?> resProject = (LogicalProject<?>) resJoin.right(); + Assertions.assertInstanceOf(LogicalOlapScan.class, resProject.child()); + LogicalOlapScan resScan = (LogicalOlapScan) resProject.child(); + Assertions.assertEquals(1, resScan.getVirtualColumns().size()); + Alias vcAlias = (Alias) resScan.getVirtualColumns().get(0); + Assertions.assertInstanceOf(MatchAny.class, vcAlias.child()); + Assertions.assertInstanceOf(Cast.class, ((MatchAny) vcAlias.child()).left()); + + // Filter predicate replaced MATCH with slot reference + Expression resPredicate = resFilter.getConjuncts().iterator().next(); + Assertions.assertInstanceOf(Or.class, resPredicate); + Assertions.assertInstanceOf(SlotReference.class, ((Or) resPredicate).child(0)); + } + + /** + * Pattern 2R: Filter -> Join -> right(Project -> Filter -> OlapScan) + */ + @Test + void testPattern2RightSideFilterJoinProjectFilterScan() { + LogicalOlapScan leftScan = PlanConstructor.newLogicalOlapScan(0, "t1", 0); + Slot leftIdSlot = leftScan.getOutput().get(0); + + LogicalOlapScan rightScan = PlanConstructor.newLogicalOlapScan(1, "t2", 0); + List<Slot> rightSlots = rightScan.getOutput(); + Slot rightIdSlot = rightSlots.get(0); + Slot rightNameSlot = rightSlots.get(1); + + // Inner filter on right scan + GreaterThan innerPred = new GreaterThan(rightIdSlot, new IntegerLiteral(0)); + LogicalFilter<LogicalOlapScan> innerFilter = new LogicalFilter<>( + ImmutableSet.of(innerPred), rightScan); + + Cast castExpr = new Cast(rightNameSlot, StringType.INSTANCE); + Alias fnAlias = new Alias(castExpr, "fn"); + LogicalProject<LogicalFilter<LogicalOlapScan>> rightProject = new LogicalProject<>( + ImmutableList.of(rightIdSlot, fnAlias), innerFilter); + + LogicalJoin<?, ?> join = new LogicalJoin<>( + JoinType.RIGHT_OUTER_JOIN, leftScan, rightProject, new JoinReorderContext()); + + Slot fnSlot = fnAlias.toSlot(); + MatchAny matchExpr = new MatchAny(fnSlot, new StringLiteral("hello")); + Or orPredicate = new Or(matchExpr, new Not(new IsNull(leftIdSlot))); + LogicalFilter<?> outerFilter = new LogicalFilter<>(ImmutableSet.of(orPredicate), join); + + Plan root = PlanChecker.from(MemoTestUtils.createConnectContext(), outerFilter) + .applyTopDown(new PushDownMatchPredicateAsVirtualColumn()) + .getPlan(); + + // Verify right side: Project -> Filter -> OlapScan with VC + LogicalFilter<?> resFilter = (LogicalFilter<?>) root; + LogicalJoin<?, ?> resJoin = (LogicalJoin<?, ?>) resFilter.child(); + LogicalProject<?> resProject = (LogicalProject<?>) resJoin.right(); + Assertions.assertInstanceOf(LogicalFilter.class, resProject.child()); + LogicalFilter<?> resInnerFilter = (LogicalFilter<?>) resProject.child(); + LogicalOlapScan resScan = (LogicalOlapScan) resInnerFilter.child(); + Assertions.assertEquals(1, resScan.getVirtualColumns().size()); + Assertions.assertEquals(ImmutableSet.of(innerPred), resInnerFilter.getConjuncts()); + } + + /** + * Pattern 3R: Join(otherPredicates) -> right(Project -> OlapScan) + */ + @Test + void testPattern3RightSideJoinOtherPredicatesProjectScan() { + LogicalOlapScan leftScan = PlanConstructor.newLogicalOlapScan(0, "t1", 0); + Slot leftIdSlot = leftScan.getOutput().get(0); + + LogicalOlapScan rightScan = PlanConstructor.newLogicalOlapScan(1, "t2", 0); + List<Slot> rightSlots = rightScan.getOutput(); + Slot rightIdSlot = rightSlots.get(0); + Slot rightNameSlot = rightSlots.get(1); + + Cast castExpr = new Cast(rightNameSlot, StringType.INSTANCE); + Alias fnAlias = new Alias(castExpr, "fn"); + LogicalProject<LogicalOlapScan> rightProject = new LogicalProject<>( + ImmutableList.of(rightIdSlot, fnAlias), rightScan); + + Slot fnSlot = fnAlias.toSlot(); + MatchAny matchExpr = new MatchAny(fnSlot, new StringLiteral("hello")); + Or orOther = new Or(matchExpr, new Not(new IsNull(leftIdSlot))); + + LogicalJoin<LogicalOlapScan, LogicalProject<LogicalOlapScan>> join = new LogicalJoin<>( + JoinType.RIGHT_OUTER_JOIN, + ImmutableList.of(), + ImmutableList.of(orOther), + leftScan, rightProject, new JoinReorderContext()); + + Plan root = PlanChecker.from(MemoTestUtils.createConnectContext(), join) + .applyTopDown(new PushDownMatchPredicateAsVirtualColumn()) + .getPlan(); + + // Verify right side has virtual column + Assertions.assertInstanceOf(LogicalJoin.class, root); + LogicalJoin<?, ?> resJoin = (LogicalJoin<?, ?>) root; + Assertions.assertInstanceOf(LogicalOlapScan.class, resJoin.left()); + LogicalProject<?> resProject = (LogicalProject<?>) resJoin.right(); + LogicalOlapScan resScan = (LogicalOlapScan) resProject.child(); + Assertions.assertEquals(1, resScan.getVirtualColumns().size()); + + List<Expression> resOther = resJoin.getOtherJoinConjuncts(); + Assertions.assertEquals(1, resOther.size()); + Assertions.assertInstanceOf(SlotReference.class, ((Or) resOther.get(0)).child(0)); + } + + /** + * Pattern 3: Join(otherPredicates has MATCH) -> Project -> OlapScan + */ + @Test + void testPattern3JoinOtherPredicatesProjectScan() { + LogicalOlapScan leftScan = PlanConstructor.newLogicalOlapScan(0, "t1", 0); + List<Slot> leftSlots = leftScan.getOutput(); + Slot idSlot = leftSlots.get(0); + Slot nameSlot = leftSlots.get(1); + + Cast castExpr = new Cast(nameSlot, StringType.INSTANCE); + Alias fnAlias = new Alias(castExpr, "fn"); + LogicalProject<LogicalOlapScan> leftProject = new LogicalProject<>( + ImmutableList.of(idSlot, fnAlias), leftScan); + + LogicalOlapScan rightScan = PlanConstructor.newLogicalOlapScan(1, "t2", 0); + Slot rightIdSlot = rightScan.getOutput().get(0); + + // MATCH in join's otherJoinConjuncts + Slot fnSlot = fnAlias.toSlot(); + MatchAny matchExpr = new MatchAny(fnSlot, new StringLiteral("hello")); + Or orOther = new Or(matchExpr, new Not(new IsNull(rightIdSlot))); + + LogicalJoin<LogicalProject<LogicalOlapScan>, LogicalOlapScan> join = new LogicalJoin<>( + JoinType.LEFT_OUTER_JOIN, + ImmutableList.of(), + ImmutableList.of(orOther), + leftProject, rightScan, new JoinReorderContext()); + + Plan root = PlanChecker.from(MemoTestUtils.createConnectContext(), join) + .applyTopDown(new PushDownMatchPredicateAsVirtualColumn()) + .getPlan(); + + // Verify: Join -> Project -> OlapScan with virtual column + Assertions.assertInstanceOf(LogicalJoin.class, root); + LogicalJoin<?, ?> resJoin = (LogicalJoin<?, ?>) root; + LogicalProject<?> resProject = (LogicalProject<?>) resJoin.left(); + LogicalOlapScan resScan = (LogicalOlapScan) resProject.child(); + Assertions.assertEquals(1, resScan.getVirtualColumns().size()); + + // Verify MATCH in otherJoinConjuncts was replaced with slot + List<Expression> resOther = resJoin.getOtherJoinConjuncts(); + Assertions.assertEquals(1, resOther.size()); + Or resOr = (Or) resOther.get(0); + Assertions.assertInstanceOf(SlotReference.class, resOr.child(0)); + } +} Review Comment: **[Medium] Missing regression test:** The PR has good unit tests but no end-to-end regression test. The sibling rule `PushDownMatchProjectionAsVirtualColumn` has a thorough regression test at `regression-test/suites/inverted_index_p0/test_match_projection_virtual_column.groovy`. Per AGENTS.md: "All kernel features must have corresponding tests. Prioritize adding regression tests under `regression-test/`." A regression test (e.g., `test_match_predicate_virtual_column.groovy`) should verify: 1. MATCH in WHERE with LEFT OUTER JOIN + OR produces correct results with real data 2. MATCH on right side of RIGHT OUTER JOIN works end-to-end 3. EXPLAIN shows `__DORIS_VIRTUAL_COL__` in the scan node for the predicate patterns 4. NULL handling is correct when OUTER JOINs produce NULL rows 5. Results with inverted index match results without index (correctness baseline) ########## fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PushDownMatchPredicateAsVirtualColumn.java: ########## @@ -0,0 +1,334 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.rules.rewrite; + +import org.apache.doris.nereids.rules.Rule; +import org.apache.doris.nereids.rules.RuleType; +import org.apache.doris.nereids.trees.expressions.Alias; +import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.Match; +import org.apache.doris.nereids.trees.expressions.NamedExpression; +import org.apache.doris.nereids.trees.expressions.Slot; +import org.apache.doris.nereids.trees.expressions.SlotReference; +import org.apache.doris.nereids.trees.plans.Plan; +import org.apache.doris.nereids.trees.plans.logical.LogicalFilter; +import org.apache.doris.nereids.trees.plans.logical.LogicalJoin; +import org.apache.doris.nereids.trees.plans.logical.LogicalOlapScan; +import org.apache.doris.nereids.trees.plans.logical.LogicalProject; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableSet; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; + +/** + * Push down MATCH expressions from join/filter predicates as virtual columns on OlapScan. + * + * When MATCH appears in a predicate that cannot be pushed below a join (e.g., OR with + * join-dependent conditions like EXISTS mark or outer join null checks), this rule: + * 1. Extracts the MATCH expression from the predicate + * 2. Traces the alias slot back through the Project to find the original column expression + * 3. Creates a virtual column on the OlapScan with the MATCH on the original expression + * 4. Replaces the MATCH in the predicate with the virtual column's boolean slot + * + * Handles both left-side and right-side Project→OlapScan in joins. + */ +public class PushDownMatchPredicateAsVirtualColumn implements RewriteRuleFactory { + + private boolean canPushDown(LogicalOlapScan scan) { + return PushDownMatchProjectionAsVirtualColumn.canPushDownMatch(scan); + } + + @Override + public List<Rule> buildRules() { + return ImmutableList.of( + // Pattern 1L: Filter -> Join -> left(Project -> OlapScan) + logicalFilter(logicalJoin( + logicalProject(logicalOlapScan().when(this::canPushDown)), any())) + .when(filter -> hasMatchInSet(filter.getConjuncts())) + .then(filter -> handleFilterSide(filter, true, false)) + .toRule(RuleType.PUSH_DOWN_MATCH_PREDICATE_AS_VIRTUAL_COLUMN), + + // Pattern 1R: Filter -> Join -> right(Project -> OlapScan) + logicalFilter(logicalJoin( + any(), logicalProject(logicalOlapScan().when(this::canPushDown)))) + .when(filter -> hasMatchInSet(filter.getConjuncts())) + .then(filter -> handleFilterSide(filter, false, false)) + .toRule(RuleType.PUSH_DOWN_MATCH_PREDICATE_AS_VIRTUAL_COLUMN), + + // Pattern 2L: Filter -> Join -> left(Project -> Filter -> OlapScan) + logicalFilter(logicalJoin( + logicalProject(logicalFilter(logicalOlapScan().when(this::canPushDown))), any())) + .when(filter -> hasMatchInSet(filter.getConjuncts())) + .then(filter -> handleFilterSide(filter, true, true)) + .toRule(RuleType.PUSH_DOWN_MATCH_PREDICATE_AS_VIRTUAL_COLUMN), + + // Pattern 2R: Filter -> Join -> right(Project -> Filter -> OlapScan) + logicalFilter(logicalJoin( + any(), logicalProject(logicalFilter(logicalOlapScan().when(this::canPushDown))))) + .when(filter -> hasMatchInSet(filter.getConjuncts())) + .then(filter -> handleFilterSide(filter, false, true)) + .toRule(RuleType.PUSH_DOWN_MATCH_PREDICATE_AS_VIRTUAL_COLUMN), + + // Pattern 3L: Join(otherPredicates) -> left(Project -> OlapScan) + logicalJoin( + logicalProject(logicalOlapScan().when(this::canPushDown)), any()) + .when(join -> hasMatchInList(join.getOtherJoinConjuncts())) + .then(join -> handleJoinSide(join, true, false)) + .toRule(RuleType.PUSH_DOWN_MATCH_PREDICATE_AS_VIRTUAL_COLUMN), + + // Pattern 3R: Join(otherPredicates) -> right(Project -> OlapScan) + logicalJoin( + any(), logicalProject(logicalOlapScan().when(this::canPushDown))) + .when(join -> hasMatchInList(join.getOtherJoinConjuncts())) + .then(join -> handleJoinSide(join, false, false)) + .toRule(RuleType.PUSH_DOWN_MATCH_PREDICATE_AS_VIRTUAL_COLUMN), + + // Pattern 4L: Join(otherPredicates) -> left(Project -> Filter -> OlapScan) + logicalJoin( + logicalProject(logicalFilter(logicalOlapScan().when(this::canPushDown))), any()) + .when(join -> hasMatchInList(join.getOtherJoinConjuncts())) + .then(join -> handleJoinSide(join, true, true)) + .toRule(RuleType.PUSH_DOWN_MATCH_PREDICATE_AS_VIRTUAL_COLUMN), + + // Pattern 4R: Join(otherPredicates) -> right(Project -> Filter -> OlapScan) + logicalJoin( + any(), logicalProject(logicalFilter(logicalOlapScan().when(this::canPushDown)))) + .when(join -> hasMatchInList(join.getOtherJoinConjuncts())) + .then(join -> handleJoinSide(join, false, true)) + .toRule(RuleType.PUSH_DOWN_MATCH_PREDICATE_AS_VIRTUAL_COLUMN) + ); + } + + private Plan handleFilterSide(LogicalFilter<?> filter, boolean isLeft, boolean hasInnerFilter) { + LogicalJoin<?, ?> join = (LogicalJoin<?, ?>) filter.child(); + Plan side = isLeft ? join.left() : join.right(); + LogicalProject<?> project = (LogicalProject<?>) side; + + LogicalOlapScan scan; + ScanRebuilder rebuilder; + if (hasInnerFilter) { + LogicalFilter<?> scanFilter = (LogicalFilter<?>) project.child(); + scan = (LogicalOlapScan) scanFilter.child(); + rebuilder = newScan -> scanFilter.withChildren(ImmutableList.of(newScan)); + } else { + scan = (LogicalOlapScan) project.child(); + rebuilder = newScan -> newScan; + } + + Set<Slot> projectOutputSlots = ImmutableSet.copyOf(project.getOutput()); + List<Expression> predicateList = new ArrayList<>(filter.getConjuncts()); + PushDownResult result = buildVirtualColumnsFromList(predicateList, project, scan, projectOutputSlots); + if (result == null) { + return null; + } + + LogicalProject<?> newProject = (LogicalProject<?>) project.withProjectsAndChild( + result.newProjections, rebuilder.rebuild(result.newScan)); + Plan newJoin = isLeft + ? join.withChildren(newProject, join.right()) + : join.withChildren(join.left(), newProject); + return filter.withConjunctsAndChild(ImmutableSet.copyOf(result.newPredicateList), newJoin); + } + + private Plan handleJoinSide(LogicalJoin<?, ?> join, boolean isLeft, boolean hasInnerFilter) { + Plan side = isLeft ? join.left() : join.right(); + LogicalProject<?> project = (LogicalProject<?>) side; + + LogicalOlapScan scan; + ScanRebuilder rebuilder; + if (hasInnerFilter) { + LogicalFilter<?> scanFilter = (LogicalFilter<?>) project.child(); + scan = (LogicalOlapScan) scanFilter.child(); + rebuilder = newScan -> scanFilter.withChildren(ImmutableList.of(newScan)); + } else { + scan = (LogicalOlapScan) project.child(); + rebuilder = newScan -> newScan; + } + + Set<Slot> projectOutputSlots = ImmutableSet.copyOf(project.getOutput()); + List<Expression> otherConjuncts = join.getOtherJoinConjuncts(); + PushDownResult result = buildVirtualColumnsFromList(otherConjuncts, project, scan, projectOutputSlots); + if (result == null) { + return null; + } + + LogicalProject<?> newProject = (LogicalProject<?>) project.withProjectsAndChild( + result.newProjections, rebuilder.rebuild(result.newScan)); + Plan newLeft = isLeft ? newProject : join.left(); + Plan newRight = isLeft ? join.right() : newProject; + return join.withJoinConjuncts(join.getHashJoinConjuncts(), + result.newPredicateList, join.getJoinReorderContext()) + .withChildren(newLeft, newRight); + } + + private interface ScanRebuilder { + Plan rebuild(LogicalOlapScan newScan); + } + + private boolean hasMatchInSet(Set<Expression> conjuncts) { + return conjuncts.stream().anyMatch(this::containsMatch); + } + + private boolean hasMatchInList(List<Expression> exprs) { + return exprs.stream().anyMatch(this::containsMatch); + } + + private boolean containsMatch(Expression expr) { + if (expr instanceof Match) { + return true; + } + for (Expression child : expr.children()) { + if (containsMatch(child)) { + return true; + } + } + return false; + } + + private PushDownResult buildVirtualColumnsFromList(List<Expression> predicates, + LogicalProject<?> project, LogicalOlapScan scan, Set<Slot> projectOutputSlots) { + Map<Match, Alias> matchToVirtualColumn = new HashMap<>(); + Map<Match, Slot> matchToVirtualSlot = new HashMap<>(); + + for (Expression predicate : predicates) { + collectMatchesNeedingPushDown(predicate, project, projectOutputSlots, + matchToVirtualColumn, matchToVirtualSlot); + } + + if (matchToVirtualColumn.isEmpty()) { + return null; + } + + LogicalOlapScan newScan = scan.appendVirtualColumns( + new ArrayList<>(matchToVirtualColumn.values())); + + List<NamedExpression> newProjections = new ArrayList<>(project.getProjects()); + for (Alias vcAlias : matchToVirtualColumn.values()) { + newProjections.add(vcAlias.toSlot()); + } + + List<Expression> newPredicateList = new ArrayList<>(); + for (Expression predicate : predicates) { + newPredicateList.add(replaceMatch(predicate, matchToVirtualSlot)); + } + + return new PushDownResult(newScan, newProjections, newPredicateList); + } + + private void collectMatchesNeedingPushDown(Expression expr, + LogicalProject<?> project, Set<Slot> projectOutputSlots, + Map<Match, Alias> matchToVirtualColumn, Map<Match, Slot> matchToVirtualSlot) { + if (expr instanceof Match) { + Match match = (Match) expr; + Set<Slot> inputSlots = match.left().getInputSlots(); + List<SlotReference> matchSlots = inputSlots.stream() + .filter(SlotReference.class::isInstance) + .map(SlotReference.class::cast) + .collect(Collectors.toList()); Review Comment: **[Nit] Clarity suggestion:** The `matchSlots` list handling implies multi-slot left sides are possible, but MATCH in Doris is constrained to single-slot left sides (enforced by `CheckMatchExpression`). The "use the first slot" comment at line 265 is slightly misleading. Consider either: 1. Adding a `Preconditions.checkState(matchSlots.size() == 1)` assertion for defensive clarity, or 2. Adding a brief comment noting that MATCH left side is always a single slot per `CheckMatchExpression` validation. This would make the intent clearer for future readers. ########## fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PushDownMatchPredicateAsVirtualColumn.java: ########## @@ -0,0 +1,334 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.rules.rewrite; + +import org.apache.doris.nereids.rules.Rule; +import org.apache.doris.nereids.rules.RuleType; +import org.apache.doris.nereids.trees.expressions.Alias; +import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.Match; +import org.apache.doris.nereids.trees.expressions.NamedExpression; +import org.apache.doris.nereids.trees.expressions.Slot; +import org.apache.doris.nereids.trees.expressions.SlotReference; +import org.apache.doris.nereids.trees.plans.Plan; +import org.apache.doris.nereids.trees.plans.logical.LogicalFilter; +import org.apache.doris.nereids.trees.plans.logical.LogicalJoin; +import org.apache.doris.nereids.trees.plans.logical.LogicalOlapScan; +import org.apache.doris.nereids.trees.plans.logical.LogicalProject; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableSet; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; + +/** + * Push down MATCH expressions from join/filter predicates as virtual columns on OlapScan. + * + * When MATCH appears in a predicate that cannot be pushed below a join (e.g., OR with + * join-dependent conditions like EXISTS mark or outer join null checks), this rule: + * 1. Extracts the MATCH expression from the predicate + * 2. Traces the alias slot back through the Project to find the original column expression + * 3. Creates a virtual column on the OlapScan with the MATCH on the original expression + * 4. Replaces the MATCH in the predicate with the virtual column's boolean slot + * + * Handles both left-side and right-side Project→OlapScan in joins. + */ +public class PushDownMatchPredicateAsVirtualColumn implements RewriteRuleFactory { + + private boolean canPushDown(LogicalOlapScan scan) { + return PushDownMatchProjectionAsVirtualColumn.canPushDownMatch(scan); + } + + @Override + public List<Rule> buildRules() { + return ImmutableList.of( + // Pattern 1L: Filter -> Join -> left(Project -> OlapScan) + logicalFilter(logicalJoin( + logicalProject(logicalOlapScan().when(this::canPushDown)), any())) + .when(filter -> hasMatchInSet(filter.getConjuncts())) + .then(filter -> handleFilterSide(filter, true, false)) + .toRule(RuleType.PUSH_DOWN_MATCH_PREDICATE_AS_VIRTUAL_COLUMN), + + // Pattern 1R: Filter -> Join -> right(Project -> OlapScan) + logicalFilter(logicalJoin( + any(), logicalProject(logicalOlapScan().when(this::canPushDown)))) + .when(filter -> hasMatchInSet(filter.getConjuncts())) + .then(filter -> handleFilterSide(filter, false, false)) + .toRule(RuleType.PUSH_DOWN_MATCH_PREDICATE_AS_VIRTUAL_COLUMN), + + // Pattern 2L: Filter -> Join -> left(Project -> Filter -> OlapScan) + logicalFilter(logicalJoin( + logicalProject(logicalFilter(logicalOlapScan().when(this::canPushDown))), any())) + .when(filter -> hasMatchInSet(filter.getConjuncts())) + .then(filter -> handleFilterSide(filter, true, true)) + .toRule(RuleType.PUSH_DOWN_MATCH_PREDICATE_AS_VIRTUAL_COLUMN), + + // Pattern 2R: Filter -> Join -> right(Project -> Filter -> OlapScan) + logicalFilter(logicalJoin( + any(), logicalProject(logicalFilter(logicalOlapScan().when(this::canPushDown))))) + .when(filter -> hasMatchInSet(filter.getConjuncts())) + .then(filter -> handleFilterSide(filter, false, true)) + .toRule(RuleType.PUSH_DOWN_MATCH_PREDICATE_AS_VIRTUAL_COLUMN), + + // Pattern 3L: Join(otherPredicates) -> left(Project -> OlapScan) + logicalJoin( + logicalProject(logicalOlapScan().when(this::canPushDown)), any()) + .when(join -> hasMatchInList(join.getOtherJoinConjuncts())) + .then(join -> handleJoinSide(join, true, false)) + .toRule(RuleType.PUSH_DOWN_MATCH_PREDICATE_AS_VIRTUAL_COLUMN), + + // Pattern 3R: Join(otherPredicates) -> right(Project -> OlapScan) + logicalJoin( + any(), logicalProject(logicalOlapScan().when(this::canPushDown))) + .when(join -> hasMatchInList(join.getOtherJoinConjuncts())) + .then(join -> handleJoinSide(join, false, false)) + .toRule(RuleType.PUSH_DOWN_MATCH_PREDICATE_AS_VIRTUAL_COLUMN), + + // Pattern 4L: Join(otherPredicates) -> left(Project -> Filter -> OlapScan) + logicalJoin( + logicalProject(logicalFilter(logicalOlapScan().when(this::canPushDown))), any()) + .when(join -> hasMatchInList(join.getOtherJoinConjuncts())) + .then(join -> handleJoinSide(join, true, true)) + .toRule(RuleType.PUSH_DOWN_MATCH_PREDICATE_AS_VIRTUAL_COLUMN), + + // Pattern 4R: Join(otherPredicates) -> right(Project -> Filter -> OlapScan) + logicalJoin( + any(), logicalProject(logicalFilter(logicalOlapScan().when(this::canPushDown)))) + .when(join -> hasMatchInList(join.getOtherJoinConjuncts())) + .then(join -> handleJoinSide(join, false, true)) + .toRule(RuleType.PUSH_DOWN_MATCH_PREDICATE_AS_VIRTUAL_COLUMN) + ); + } + + private Plan handleFilterSide(LogicalFilter<?> filter, boolean isLeft, boolean hasInnerFilter) { + LogicalJoin<?, ?> join = (LogicalJoin<?, ?>) filter.child(); + Plan side = isLeft ? join.left() : join.right(); + LogicalProject<?> project = (LogicalProject<?>) side; + + LogicalOlapScan scan; + ScanRebuilder rebuilder; + if (hasInnerFilter) { + LogicalFilter<?> scanFilter = (LogicalFilter<?>) project.child(); + scan = (LogicalOlapScan) scanFilter.child(); + rebuilder = newScan -> scanFilter.withChildren(ImmutableList.of(newScan)); + } else { + scan = (LogicalOlapScan) project.child(); + rebuilder = newScan -> newScan; + } + + Set<Slot> projectOutputSlots = ImmutableSet.copyOf(project.getOutput()); + List<Expression> predicateList = new ArrayList<>(filter.getConjuncts()); + PushDownResult result = buildVirtualColumnsFromList(predicateList, project, scan, projectOutputSlots); + if (result == null) { + return null; + } + + LogicalProject<?> newProject = (LogicalProject<?>) project.withProjectsAndChild( + result.newProjections, rebuilder.rebuild(result.newScan)); + Plan newJoin = isLeft + ? join.withChildren(newProject, join.right()) + : join.withChildren(join.left(), newProject); + return filter.withConjunctsAndChild(ImmutableSet.copyOf(result.newPredicateList), newJoin); + } + + private Plan handleJoinSide(LogicalJoin<?, ?> join, boolean isLeft, boolean hasInnerFilter) { + Plan side = isLeft ? join.left() : join.right(); + LogicalProject<?> project = (LogicalProject<?>) side; + + LogicalOlapScan scan; + ScanRebuilder rebuilder; + if (hasInnerFilter) { + LogicalFilter<?> scanFilter = (LogicalFilter<?>) project.child(); + scan = (LogicalOlapScan) scanFilter.child(); + rebuilder = newScan -> scanFilter.withChildren(ImmutableList.of(newScan)); Review Comment: **[Low] Minor efficiency:** `withJoinConjuncts(...)` creates an intermediate join with old children, then `.withChildren(...)` creates another join replacing the children. This allocates two intermediate objects. You could use `withConjunctsChildren` to do both in a single step: ```java return join.withConjunctsChildren( join.getHashJoinConjuncts(), result.newPredicateList, newLeft, newRight, join.getJoinReorderContext()); ``` Not a correctness issue — just a minor efficiency improvement following what `handleFilterSide` achieves with its single `withChildren` call. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
