This is an automated email from the ASF dual-hosted git repository. michaelsmith pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
The following commit(s) were added to refs/heads/master by this push: new 22898abbc IMPALA-14275: Ignore produced runtime filters for tuple cache keys 22898abbc is described below commit 22898abbc44864775eff73c7ccedd893704baa27 Author: Joe McDonnell <joemcdonn...@cloudera.com> AuthorDate: Wed Jul 30 11:05:21 2025 -0700 IMPALA-14275: Ignore produced runtime filters for tuple cache keys PlanNode's list of runtime filters includes both runtime filters consumed and produced. The code for incorporating runtime filters into the tuple cache key doesn't make a distinction between the two. This means that JoinNodes that produce runtime filters hash their children more than once. This only applies to mt_dop=0, because mt_dop>0 produces the runtime filter from a separate build side fragment. This hasn't produced a noticeable issue, but it is still wrong. This ignores produced runtime filters. Testing: - Added a test case in TupleCacheTest Change-Id: I5d132a5cf7de1ce19b55545171799d8f38bb8c3d Reviewed-on: http://gerrit.cloudera.org:8080/23227 Reviewed-by: Impala Public Jenkins <impala-public-jenk...@cloudera.com> Tested-by: Michael Smith <michael.sm...@cloudera.com> --- fe/src/main/java/org/apache/impala/planner/PlanNode.java | 10 +++++++--- .../java/org/apache/impala/planner/TupleCacheTest.java | 15 +++++++++++++++ 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/fe/src/main/java/org/apache/impala/planner/PlanNode.java b/fe/src/main/java/org/apache/impala/planner/PlanNode.java index 46de0316e..27116e7a6 100644 --- a/fe/src/main/java/org/apache/impala/planner/PlanNode.java +++ b/fe/src/main/java/org/apache/impala/planner/PlanNode.java @@ -1368,10 +1368,14 @@ abstract public class PlanNode extends TreeNode<PlanNode> { return; } - // Include the build-side of a RuntimeFilter; look past the 1st ExchangeNode. - // If the build-side is hashable, merge the hash. Otherwise mark this node as - // ineligible because the RuntimeFilter is too complex to reason about. + // For runtime filters consumed by this node, include the build-side of the + // RuntimeFilter (look past the 1st ExchangeNode). If the build-side is hashable, + // merge the hash. Otherwise mark this node as ineligible because the RuntimeFilter + // is too complex to reason about. for (RuntimeFilter filter : runtimeFilters_) { + // We should only include runtime filters consumed by this node. If this node is + // the source of the runtime filter, skip it. + if (filter.getSrc() == this) continue; // We want the build side of the join. PlanNode build = filter.getSrc().getBuildNode(); Preconditions.checkState(!build.contains(this), diff --git a/fe/src/test/java/org/apache/impala/planner/TupleCacheTest.java b/fe/src/test/java/org/apache/impala/planner/TupleCacheTest.java index 0c211a42a..eaba873a6 100644 --- a/fe/src/test/java/org/apache/impala/planner/TupleCacheTest.java +++ b/fe/src/test/java/org/apache/impala/planner/TupleCacheTest.java @@ -118,6 +118,21 @@ public class TupleCacheTest extends PlannerTestBase { String.format(basicJoinTmpl, "probe.id = build.id"), String.format(basicJoinTmpl, "probe.id = build.id and build.id < 100"), isDistributedPlan); + + // JoinNodes produce runtime filters and don't consume them. Verify that produced + // runtime filters don't get incorporated into the hash. + List<PlanNode> cacheEligibleNodes = + getCacheEligibleNodes(String.format(basicJoinTmpl, "probe.id = build.id")); + for (PlanNode node : cacheEligibleNodes) { + if (node instanceof JoinNode) { + // The join node should not have any hash trace elements due to runtime filters + List<HashTraceElement> hashTraces = node.getTupleCacheInfo().getHashTraces(); + for (HashTraceElement hashTrace : hashTraces) { + assertTrue(hashTrace.getComment().indexOf("runtime filter") == -1); + assertTrue(hashTrace.getComment().indexOf("RF0") == -1); + } + } + } } }