alamb commented on code in PR #14119: URL: https://github.com/apache/datafusion/pull/14119#discussion_r1926002173
########## datafusion/sqllogictest/test_files/parquet.slt: ########## @@ -598,3 +598,34 @@ drop table cpu; statement ok drop table cpu_parquet; + +# Test for parquet predicate pruning with `starts_with` function +query I +copy (values ('foo'), ('bar'), ('baz')) TO 'test_files/scratch/parquet/foo.parquet' +---- +3 + +statement ok +create external table foo +stored as parquet +location 'test_files/scratch/parquet/foo.parquet'; + + +# Expect that the pruning predicate contain a comparison on the min/max value of `column1): +# column1_min@0 <= g AND f <= column1_max@1` +# (the starts_with function is not supported in the parquet predicate pruning but DataFusion rewrites +# it to a like which is then handled by the PruningPredicate) +query TT +explain select * from foo where starts_with(column1, 'f'); +---- +logical_plan +01)Filter: foo.column1 LIKE Utf8View("f%") +02)--TableScan: foo projection=[column1], partial_filters=[foo.column1 LIKE Utf8View("f%")] +physical_plan +01)CoalesceBatchesExec: target_batch_size=8192 +02)--FilterExec: column1@0 LIKE f% +03)----RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1 +04)------ParquetExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet/foo.parquet]]}, projection=[column1], predicate=column1@0 LIKE f%, pruning_predicate=column1_null_count@2 != column1_row_count@3 AND column1_min@0 <= g AND f <= column1_max@1, required_guarantees=[] Review Comment: this is so cool! -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org