rluvaton commented on code in PR #21184:
URL: https://github.com/apache/datafusion/pull/21184#discussion_r2997535751
##########
datafusion/physical-plan/src/joins/sort_merge_join/tests.rs:
##########
@@ -2721,354 +2721,6 @@ async fn test_left_outer_join_filtered_mask() ->
Result<()> {
Ok(())
}
-#[tokio::test]
-async fn test_semi_join_filtered_mask() -> Result<()> {
- for join_type in [LeftSemi, RightSemi] {
- let mut joined_batches = build_joined_record_batches()?;
- let schema = joined_batches.joined_batches.schema();
-
- let output = joined_batches.concat_batches(&schema)?;
- let out_mask = joined_batches.filter_metadata.filter_mask.finish();
- let out_indices = joined_batches.filter_metadata.row_indices.finish();
-
- assert_eq!(
- get_corrected_filter_mask(
- join_type,
- &UInt64Array::from(vec![0]),
- &[0usize],
- &BooleanArray::from(vec![true]),
- output.num_rows()
- )
- .unwrap(),
- BooleanArray::from(vec![true])
- );
-
- assert_eq!(
- get_corrected_filter_mask(
- join_type,
- &UInt64Array::from(vec![0]),
- &[0usize],
- &BooleanArray::from(vec![false]),
- output.num_rows()
- )
- .unwrap(),
- BooleanArray::from(vec![None])
- );
-
- assert_eq!(
- get_corrected_filter_mask(
- join_type,
- &UInt64Array::from(vec![0, 0]),
- &[0usize; 2],
- &BooleanArray::from(vec![true, true]),
- output.num_rows()
- )
- .unwrap(),
- BooleanArray::from(vec![Some(true), None])
- );
-
- assert_eq!(
- get_corrected_filter_mask(
- join_type,
- &UInt64Array::from(vec![0, 0, 0]),
- &[0usize; 3],
- &BooleanArray::from(vec![true, true, true]),
- output.num_rows()
- )
- .unwrap(),
- BooleanArray::from(vec![Some(true), None, None])
- );
-
- assert_eq!(
- get_corrected_filter_mask(
- join_type,
- &UInt64Array::from(vec![0, 0, 0]),
- &[0usize; 3],
- &BooleanArray::from(vec![true, false, true]),
- output.num_rows()
- )
- .unwrap(),
- BooleanArray::from(vec![Some(true), None, None])
- );
-
- assert_eq!(
- get_corrected_filter_mask(
- join_type,
- &UInt64Array::from(vec![0, 0, 0]),
- &[0usize; 3],
- &BooleanArray::from(vec![false, false, true]),
- output.num_rows()
- )
- .unwrap(),
- BooleanArray::from(vec![None, None, Some(true),])
- );
-
- assert_eq!(
- get_corrected_filter_mask(
- join_type,
- &UInt64Array::from(vec![0, 0, 0]),
- &[0usize; 3],
- &BooleanArray::from(vec![false, true, true]),
- output.num_rows()
- )
- .unwrap(),
- BooleanArray::from(vec![None, Some(true), None])
- );
-
- assert_eq!(
- get_corrected_filter_mask(
- join_type,
- &UInt64Array::from(vec![0, 0, 0]),
- &[0usize; 3],
- &BooleanArray::from(vec![false, false, false]),
- output.num_rows()
- )
- .unwrap(),
- BooleanArray::from(vec![None, None, None])
- );
-
- let corrected_mask = get_corrected_filter_mask(
- join_type,
- &out_indices,
- &joined_batches.filter_metadata.batch_ids,
- &out_mask,
- output.num_rows(),
- )
- .unwrap();
-
- assert_eq!(
- corrected_mask,
- BooleanArray::from(vec![
- Some(true),
- None,
- Some(true),
- None,
- Some(true),
- None,
- None,
- None
- ])
- );
-
- let filtered_rb = filter_record_batch(&output, &corrected_mask)?;
-
- assert_batches_eq!(
- &[
- "+---+----+---+----+",
- "| a | b | x | y |",
- "+---+----+---+----+",
- "| 1 | 10 | 1 | 11 |",
- "| 1 | 11 | 1 | 12 |",
- "| 1 | 12 | 1 | 13 |",
- "+---+----+---+----+",
- ],
- &[filtered_rb]
- );
-
- // output null rows
- let null_mask = arrow::compute::not(&corrected_mask)?;
- assert_eq!(
- null_mask,
- BooleanArray::from(vec![
- Some(false),
- None,
- Some(false),
- None,
- Some(false),
- None,
- None,
- None
- ])
- );
-
- let null_joined_batch = filter_record_batch(&output, &null_mask)?;
-
- assert_batches_eq!(
- &[
- "+---+---+---+---+",
- "| a | b | x | y |",
- "+---+---+---+---+",
- "+---+---+---+---+",
- ],
- &[null_joined_batch]
- );
- }
- Ok(())
-}
-
-#[tokio::test]
-async fn test_anti_join_filtered_mask() -> Result<()> {
- for join_type in [LeftAnti, RightAnti] {
- let mut joined_batches = build_joined_record_batches()?;
- let schema = joined_batches.joined_batches.schema();
-
- let output = joined_batches.concat_batches(&schema)?;
- let out_mask = joined_batches.filter_metadata.filter_mask.finish();
- let out_indices = joined_batches.filter_metadata.row_indices.finish();
-
- assert_eq!(
- get_corrected_filter_mask(
- join_type,
- &UInt64Array::from(vec![0]),
- &[0usize],
- &BooleanArray::from(vec![true]),
- 1
- )
- .unwrap(),
- BooleanArray::from(vec![None])
- );
-
- assert_eq!(
- get_corrected_filter_mask(
- join_type,
- &UInt64Array::from(vec![0]),
- &[0usize],
- &BooleanArray::from(vec![false]),
- 1
- )
- .unwrap(),
- BooleanArray::from(vec![Some(true)])
- );
-
- assert_eq!(
- get_corrected_filter_mask(
- join_type,
- &UInt64Array::from(vec![0, 0]),
- &[0usize; 2],
- &BooleanArray::from(vec![true, true]),
- 2
- )
- .unwrap(),
- BooleanArray::from(vec![None, None])
- );
-
- assert_eq!(
- get_corrected_filter_mask(
- join_type,
- &UInt64Array::from(vec![0, 0, 0]),
- &[0usize; 3],
- &BooleanArray::from(vec![true, true, true]),
- 3
- )
- .unwrap(),
- BooleanArray::from(vec![None, None, None])
- );
-
- assert_eq!(
- get_corrected_filter_mask(
- join_type,
- &UInt64Array::from(vec![0, 0, 0]),
- &[0usize; 3],
- &BooleanArray::from(vec![true, false, true]),
- 3
- )
- .unwrap(),
- BooleanArray::from(vec![None, None, None])
- );
-
- assert_eq!(
- get_corrected_filter_mask(
- join_type,
- &UInt64Array::from(vec![0, 0, 0]),
- &[0usize; 3],
- &BooleanArray::from(vec![false, false, true]),
- 3
- )
- .unwrap(),
- BooleanArray::from(vec![None, None, None])
- );
-
- assert_eq!(
- get_corrected_filter_mask(
- join_type,
- &UInt64Array::from(vec![0, 0, 0]),
- &[0usize; 3],
- &BooleanArray::from(vec![false, true, true]),
- 3
- )
- .unwrap(),
- BooleanArray::from(vec![None, None, None])
- );
-
- assert_eq!(
- get_corrected_filter_mask(
- join_type,
- &UInt64Array::from(vec![0, 0, 0]),
- &[0usize; 3],
- &BooleanArray::from(vec![false, false, false]),
- 3
- )
- .unwrap(),
- BooleanArray::from(vec![None, None, Some(true)])
- );
-
- let corrected_mask = get_corrected_filter_mask(
- join_type,
- &out_indices,
- &joined_batches.filter_metadata.batch_ids,
- &out_mask,
- output.num_rows(),
- )
- .unwrap();
-
- assert_eq!(
- corrected_mask,
- BooleanArray::from(vec![
- None,
- None,
- None,
- None,
- None,
- Some(true),
- None,
- Some(true)
- ])
- );
-
- let filtered_rb = filter_record_batch(&output, &corrected_mask)?;
-
- allow_duplicates! {
- assert_snapshot!(batches_to_string(&[filtered_rb]), @r"
- +---+----+---+----+
- | a | b | x | y |
- +---+----+---+----+
- | 1 | 13 | 1 | 12 |
- | 1 | 14 | 1 | 11 |
- +---+----+---+----+
- ");
- }
-
- // output null rows
- let null_mask = arrow::compute::not(&corrected_mask)?;
- assert_eq!(
- null_mask,
- BooleanArray::from(vec![
- None,
- None,
- None,
- None,
- None,
- Some(false),
- None,
- Some(false),
- ])
- );
-
- let null_joined_batch = filter_record_batch(&output, &null_mask)?;
-
- allow_duplicates! {
- assert_snapshot!(batches_to_string(&[null_joined_batch]), @r"
- +---+---+---+---+
- | a | b | x | y |
- +---+---+---+---+
- +---+---+---+---+
- ");
- }
- }
-
- Ok(())
-}
Review Comment:
why removed and not moved?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]