comphead commented on code in PR #16123: URL: https://github.com/apache/datafusion/pull/16123#discussion_r2098950624
########## datafusion/core/tests/parquet/filter_pushdown.rs: ########## @@ -32,50 +32,41 @@ use arrow::compute::concat_batches; use arrow::record_batch::RecordBatch; use datafusion::physical_plan::collect; use datafusion::physical_plan::metrics::MetricsSet; -use datafusion::prelude::{col, lit, lit_timestamp_nano, Expr, SessionContext}; +use datafusion::prelude::{ + col, lit, lit_timestamp_nano, Expr, ParquetReadOptions, SessionContext, +}; use datafusion::test_util::parquet::{ParquetScanOptions, TestParquetFile}; -use datafusion_common::instant::Instant; use datafusion_expr::utils::{conjunction, disjunction, split_conjunction}; use itertools::Itertools; use parquet::file::properties::WriterProperties; use tempfile::TempDir; -use test_utils::AccessLogGenerator; /// how many rows of generated data to write to our parquet file (arbitrary) const NUM_ROWS: usize = 4096; -fn generate_file(tempdir: &TempDir, props: WriterProperties) -> TestParquetFile { - // Tune down the generator for smaller files - let generator = AccessLogGenerator::new() - .with_row_limit(NUM_ROWS) - .with_pods_per_host(1..4) - .with_containers_per_pod(1..2) - .with_entries_per_container(128..256); - - let file = tempdir.path().join("data.parquet"); - - let start = Instant::now(); - println!("Writing test data to {file:?}"); - let test_parquet_file = TestParquetFile::try_new(file, props, generator).unwrap(); - println!( - "Completed generating test data in {:?}", - Instant::now() - start - ); - test_parquet_file -} - #[tokio::test] async fn single_file() { // Only create the parquet file once as it is fairly large - let tempdir = TempDir::new_in(Path::new(".")).unwrap(); // Set row group size smaller so can test with fewer rows let props = WriterProperties::builder() .set_max_row_group_size(1024) .build(); - let test_parquet_file = generate_file(&tempdir, props); - + let ctx: SessionContext = SessionContext::new(); Review Comment: this is actual change, use predefined parquet files instead of random generated -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org