isidentical commented on code in PR #3787:
URL: https://github.com/apache/arrow-datafusion/pull/3787#discussion_r993808211
##########
datafusion/core/src/physical_optimizer/hash_build_probe_order.rs:
##########
@@ -274,6 +337,76 @@ mod tests {
);
}
+ /// Compare the input plan with the plan after running the probe order
optimizer.
+ macro_rules! assert_optimized {
+ ($EXPECTED_LINES: expr, $PLAN: expr) => {
+ let expected_lines =
+ $EXPECTED_LINES.iter().map(|s| *s).collect::<Vec<&str>>();
+
+ let optimized = HashBuildProbeOrder::new()
+ .optimize(Arc::new($PLAN), &SessionConfig::new())
+ .unwrap();
+
+ let plan = displayable(optimized.as_ref()).indent().to_string();
+ let actual_lines = plan.split("\n").collect::<Vec<&str>>();
+
+ assert_eq!(
+ &expected_lines, &actual_lines,
+ "\n\nexpected:\n\n{:#?}\nactual:\n\n{:#?}\n\n",
+ expected_lines, actual_lines
+ );
+ };
+ }
+
+ #[tokio::test]
+ async fn test_nested_join_swap() {
+ let (big, medium, small) = create_nested_with_min_max();
+
+ let child_join = HashJoinExec::try_new(
+ Arc::clone(&big),
+ Arc::clone(&small),
+ vec![(
+ Column::new_with_schema("big_col", &big.schema()).unwrap(),
+ Column::new_with_schema("small_col", &small.schema()).unwrap(),
+ )],
+ None,
+ &JoinType::Inner,
+ PartitionMode::CollectLeft,
+ &false,
+ )
+ .unwrap();
+ let child_schema = child_join.schema();
+
+ let join = HashJoinExec::try_new(
+ Arc::clone(&medium),
+ Arc::new(child_join),
+ vec![(
+ Column::new_with_schema("medium_col",
&medium.schema()).unwrap(),
+ Column::new_with_schema("small_col", &child_schema).unwrap(),
+ )],
+ None,
+ &JoinType::Left,
+ PartitionMode::CollectLeft,
+ &false,
+ )
+ .unwrap();
+
+ // The first hash join's left is 'small' table (with 1000 rows), and
the second hash join's
+ // left is the F(small IJ big) which has an estimated cardinality of
2000 rows (vs medium which
+ // has an exact cardinality of 10_000 rows).
+ let expected = [
+ "ProjectionExec: expr=[medium_col@2 as medium_col, big_col@0 as
big_col, small_col@1 as small_col]",
+ " HashJoinExec: mode=CollectLeft, join_type=Right, on=[(Column {
name: \"small_col\", index: 1 }, Column { name: \"medium_col\", index: 0 })]",
+ " ProjectionExec: expr=[big_col@1 as big_col, small_col@0 as
small_col]",
+ " HashJoinExec: mode=CollectLeft, join_type=Inner,
on=[(Column { name: \"small_col\", index: 0 }, Column { name: \"big_col\",
index: 0 })]",
Review Comment:
That's a very good point, I'll add a couple lines to document this here as
well.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]