alamb commented on code in PR #12503:
URL: https://github.com/apache/datafusion/pull/12503#discussion_r1764905210


##########
datafusion/core/src/datasource/physical_plan/parquet/row_group_filter.rs:
##########
@@ -264,8 +264,12 @@ impl PruningStatistics for BloomFilterStatistics {
             .iter()
             .map(|value| {
                 match value {
-                    ScalarValue::Utf8(Some(v)) => sbbf.check(&v.as_str()),
-                    ScalarValue::Binary(Some(v)) => sbbf.check(v),
+                    ScalarValue::Utf8(Some(v)) | 
ScalarValue::Utf8View(Some(v)) => {
+                        sbbf.check(&v.as_str())
+                    }
+                    ScalarValue::Binary(Some(v)) | 
ScalarValue::BinaryView(Some(v)) => {
+                        sbbf.check(v)
+                    }

Review Comment:
   I double checked that the tests cover this code like this:
   
   ```diff
   --- 
a/datafusion/core/src/datasource/physical_plan/parquet/row_group_filter.rs
   +++ 
b/datafusion/core/src/datasource/physical_plan/parquet/row_group_filter.rs
   @@ -264,9 +264,12 @@ impl PruningStatistics for BloomFilterStatistics {
                .iter()
                .map(|value| {
                    match value {
   -                    ScalarValue::Utf8(Some(v)) | 
ScalarValue::Utf8View(Some(v)) => {
   +                    ScalarValue::Utf8(Some(v))  => {
                            sbbf.check(&v.as_str())
                        }
   +                    ScalarValue::Utf8View(Some(v)) => {
   +                        panic!("String view bloom filter not implemented 
yet");
   +                    }
                        ScalarValue::Binary(Some(v)) | 
ScalarValue::BinaryView(Some(v)) => {
                            sbbf.check(v)
                        }
   @@ -1439,6 +1442,7 @@ mod tests {
                }
            }
   
   +
   ```
   
   
   ```
   cargo test -p datafusion -- row_group_filter
   
   running 21 tests
   test 
datasource::physical_plan::parquet::row_group_filter::tests::test_row_group_bloom_filter_pruning_predicate_multiple_expr
 ... ok
   test 
datasource::physical_plan::parquet::row_group_filter::tests::row_group_pruning_predicate_missing_stats
 ... ok
   test 
datasource::physical_plan::parquet::row_group_filter::tests::test_row_group_bloom_filter_pruning_predicate_simple_expr
 ... ok
   test 
datasource::physical_plan::parquet::row_group_filter::tests::test_row_group_bloom_filter_pruning_predicate_sql_in
 ... ok
   test 
datasource::physical_plan::parquet::row_group_filter::tests::row_group_pruning_predicate_file_schema
 ... ok
   test 
datasource::physical_plan::parquet::row_group_filter::tests::row_group_pruning_predicate_simple_expr
 ... ok
   test 
datasource::physical_plan::parquet::row_group_filter::tests::row_group_pruning_predicate_decimal_type2
 ... ok
   test 
datasource::physical_plan::parquet::row_group_filter::tests::row_group_pruning_predicate_decimal_type
 ... ok
   test 
datasource::physical_plan::parquet::row_group_filter::tests::row_group_pruning_predicate_decimal_type3
 ... ok
   test 
datasource::physical_plan::parquet::row_group_filter::tests::row_group_pruning_predicate_null_expr
 ... ok
   test 
datasource::physical_plan::parquet::row_group_filter::tests::row_group_pruning_predicate_partial_expr
 ... ok
   test 
datasource::physical_plan::parquet::row_group_filter::tests::row_group_pruning_predicate_decimal_type4
 ... ok
   test 
datasource::physical_plan::parquet::row_group_filter::tests::row_group_pruning_predicate_decimal_type5
 ... ok
   test 
datasource::physical_plan::parquet::row_group_filter::tests::row_group_pruning_predicate_eq_null_expr
 ... ok
   test 
datasource::physical_plan::parquet::row_group_filter::tests::test_row_group_bloom_filter_pruning_predicate_with_exists_2_values
 ... ok
   test 
datasource::physical_plan::parquet::row_group_filter::tests::test_row_group_bloom_filter_pruning_predicate_with_exists_3_values
 ... ok
   test 
datasource::physical_plan::parquet::row_group_filter::tests::test_row_group_bloom_filter_pruning_predicate_with_exists_value
 ... ok
   test 
datasource::physical_plan::parquet::row_group_filter::tests::test_row_group_bloom_filter_pruning_predicate_with_or_not_eq
 ... ok
   test 
datasource::physical_plan::parquet::row_group_filter::tests::test_row_group_bloom_filter_pruning_predicate_with_exists_3_values_view
 ... FAILED
   test 
datasource::physical_plan::parquet::row_group_filter::tests::test_row_group_bloom_filter_pruning_predicate_multiple_expr_view
 ... FAILED
   test 
datasource::physical_plan::parquet::row_group_filter::tests::test_row_group_bloom_filter_pruning_predicate_without_bloom_filter
 ... ok
   
   failures:
   
   ---- 
datasource::physical_plan::parquet::row_group_filter::tests::test_row_group_bloom_filter_pruning_predicate_with_exists_3_values_view
 stdout ----
   thread 
'datasource::physical_plan::parquet::row_group_filter::tests::test_row_group_bloom_filter_pruning_predicate_with_exists_3_values_view'
 panicked at 
datafusion/core/src/datasource/physical_plan/parquet/row_group_filter.rs:271:25:
   String bloom filter not implemented yet
   
   ---- 
datasource::physical_plan::parquet::row_group_filter::tests::test_row_group_bloom_filter_pruning_predicate_multiple_expr_view
 stdout ----
   thread 
'datasource::physical_plan::parquet::row_group_filter::tests::test_row_group_bloom_filter_pruning_predicate_multiple_expr_view'
 panicked at 
datafusion/core/src/datasource/physical_plan/parquet/row_group_filter.rs:271:25:
   String bloom filter not implemented yet
   note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace
   
   
   failures:
       
datasource::physical_plan::parquet::row_group_filter::tests::test_row_group_bloom_filter_pruning_predicate_multiple_expr_view
       
datasource::physical_plan::parquet::row_group_filter::tests::test_row_group_bloom_filter_pruning_predicate_with_exists_3_values_view
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to