korowa commented on code in PR #8435:
URL: https://github.com/apache/arrow-datafusion/pull/8435#discussion_r1436901619
##########
datafusion/core/src/datasource/physical_plan/parquet/row_groups.rs:
##########
@@ -1075,82 +1075,28 @@ mod tests {
create_physical_expr(expr, &df_schema, schema,
&execution_props).unwrap()
}
- // Note the values in the `String` column are:
- // ❯ select * from
'./parquet-testing/data/data_index_bloom_encoding_stats.parquet';
- // +-----------+
- // | String |
- // +-----------+
- // | Hello |
- // | This is |
- // | a |
- // | test |
- // | How |
- // | are you |
- // | doing |
- // | today |
- // | the quick |
- // | brown fox |
- // | jumps |
- // | over |
- // | the lazy |
- // | dog |
- // +-----------+
#[tokio::test]
async fn test_row_group_bloom_filter_pruning_predicate_simple_expr() {
- // load parquet file
- let testdata = datafusion_common::test_util::parquet_test_data();
- let file_name = "data_index_bloom_encoding_stats.parquet";
- let path = format!("{testdata}/{file_name}");
- let data = bytes::Bytes::from(std::fs::read(path).unwrap());
-
- // generate pruning predicate `(String = "Hello_Not_exists")`
- let schema = Schema::new(vec![Field::new("String", DataType::Utf8,
false)]);
- let expr = col(r#""String""#).eq(lit("Hello_Not_Exists"));
- let expr = logical2physical(&expr, &schema);
- let pruning_predicate =
- PruningPredicate::try_new(expr, Arc::new(schema)).unwrap();
-
- let row_groups = vec![0];
- let pruned_row_groups = test_row_group_bloom_filter_pruning_predicate(
- file_name,
- data,
- &pruning_predicate,
- &row_groups,
- )
- .await
- .unwrap();
- assert!(pruned_row_groups.is_empty());
+ BloomFilterTest::new_data_index_bloom_encoding_stats()
+ .with_expect_all_pruned()
+ // generate pruning predicate `(String = "Hello_Not_exists")`
+ .run(col(r#""String""#).eq(lit("Hello_Not_Exists")))
+ .await
}
#[tokio::test]
async fn test_row_group_bloom_filter_pruning_predicate_mutiple_expr() {
- // load parquet file
- let testdata = datafusion_common::test_util::parquet_test_data();
- let file_name = "data_index_bloom_encoding_stats.parquet";
- let path = format!("{testdata}/{file_name}");
- let data = bytes::Bytes::from(std::fs::read(path).unwrap());
-
- // generate pruning predicate `(String = "Hello_Not_exists" OR String
= "Hello_Not_exists2")`
- let schema = Schema::new(vec![Field::new("String", DataType::Utf8,
false)]);
- let expr = lit("1").eq(lit("1")).and(
- col(r#""String""#)
- .eq(lit("Hello_Not_Exists"))
- .or(col(r#""String""#).eq(lit("Hello_Not_Exists2"))),
- );
- let expr = logical2physical(&expr, &schema);
- let pruning_predicate =
- PruningPredicate::try_new(expr, Arc::new(schema)).unwrap();
-
- let row_groups = vec![0];
- let pruned_row_groups = test_row_group_bloom_filter_pruning_predicate(
- file_name,
- data,
- &pruning_predicate,
- &row_groups,
- )
- .await
- .unwrap();
- assert!(pruned_row_groups.is_empty());
+ BloomFilterTest::new_data_index_bloom_encoding_stats()
+ .with_expect_all_pruned()
+ // generate pruning predicate `(String = "Hello_Not_exists" OR
String = "Hello_Not_exists2")`
Review Comment:
A bit controversial due to `"1" = "1"` part in actual test case expression.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]