This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/master by this push:
new 33b9357 fix test (#1831)
33b9357 is described below
commit 33b9357139ad918da0f45a92db37f00ffa64b0ba
Author: xudong.w <[email protected]>
AuthorDate: Sat Mar 5 03:53:43 2022 +0800
fix test (#1831)
---
.../physical_optimizer/hash_build_probe_order.rs | 34 ++++++++++++++++------
datafusion/src/physical_plan/mod.rs | 2 +-
2 files changed, 26 insertions(+), 10 deletions(-)
diff --git a/datafusion/src/physical_optimizer/hash_build_probe_order.rs
b/datafusion/src/physical_optimizer/hash_build_probe_order.rs
index 1184751..244eb6a 100644
--- a/datafusion/src/physical_optimizer/hash_build_probe_order.rs
+++ b/datafusion/src/physical_optimizer/hash_build_probe_order.rs
@@ -49,10 +49,18 @@ impl HashBuildProbeOrder {
}
fn should_swap_join_order(left: &dyn ExecutionPlan, right: &dyn ExecutionPlan)
-> bool {
- let left_rows = left.statistics().num_rows;
- let right_rows = right.statistics().num_rows;
+ // Get the left and right table's total bytes
+ // If both the left and right tables contain total_byte_size statistics,
+ // use `total_byte_size` to determine `should_swap_join_order`, else use
`num_rows`
+ let (left_size, right_size) = match (
+ left.statistics().total_byte_size,
+ right.statistics().total_byte_size,
+ ) {
+ (Some(l), Some(r)) => (Some(l), Some(r)),
+ _ => (left.statistics().num_rows, right.statistics().num_rows),
+ };
- match (left_rows, right_rows) {
+ match (left_size, right_size) {
(Some(l), Some(r)) => l > r,
_ => false,
}
@@ -168,7 +176,8 @@ mod tests {
fn create_big_and_small() -> (Arc<dyn ExecutionPlan>, Arc<dyn
ExecutionPlan>) {
let big = Arc::new(StatisticsExec::new(
Statistics {
- num_rows: Some(100000),
+ num_rows: Some(10),
+ total_byte_size: Some(100000),
..Default::default()
},
Schema::new(vec![Field::new("big_col", DataType::Int32, false)]),
@@ -176,7 +185,8 @@ mod tests {
let small = Arc::new(StatisticsExec::new(
Statistics {
- num_rows: Some(10),
+ num_rows: Some(100000),
+ total_byte_size: Some(10),
..Default::default()
},
Schema::new(vec![Field::new("small_col", DataType::Int32, false)]),
@@ -224,8 +234,11 @@ mod tests {
.downcast_ref::<HashJoinExec>()
.expect("The type of the plan should not be changed");
- assert_eq!(swapped_join.left().statistics().num_rows, Some(10));
- assert_eq!(swapped_join.right().statistics().num_rows, Some(100000));
+ assert_eq!(swapped_join.left().statistics().total_byte_size, Some(10));
+ assert_eq!(
+ swapped_join.right().statistics().total_byte_size,
+ Some(100000)
+ );
}
#[tokio::test]
@@ -254,8 +267,11 @@ mod tests {
.downcast_ref::<HashJoinExec>()
.expect("The type of the plan should not be changed");
- assert_eq!(swapped_join.left().statistics().num_rows, Some(10));
- assert_eq!(swapped_join.right().statistics().num_rows, Some(100000));
+ assert_eq!(swapped_join.left().statistics().total_byte_size, Some(10));
+ assert_eq!(
+ swapped_join.right().statistics().total_byte_size,
+ Some(100000)
+ );
}
#[tokio::test]
diff --git a/datafusion/src/physical_plan/mod.rs
b/datafusion/src/physical_plan/mod.rs
index f2bf23b..e2ce99f 100644
--- a/datafusion/src/physical_plan/mod.rs
+++ b/datafusion/src/physical_plan/mod.rs
@@ -95,7 +95,7 @@ pub use self::planner::PhysicalPlanner;
pub struct Statistics {
/// The number of table rows
pub num_rows: Option<usize>,
- /// total byte of the table rows
+ /// total bytes of the table rows
pub total_byte_size: Option<usize>,
/// Statistics on a column level
pub column_statistics: Option<Vec<ColumnStatistics>>,