This is an automated email from the ASF dual-hosted git repository.
houqp pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/master by this push:
new cf637e7 collect table stats by default for listing table (#1347)
cf637e7 is described below
commit cf637e7611a88cf89de1ba7fb93d21c687d30be6
Author: QP Hou <[email protected]>
AuthorDate: Mon Nov 22 16:10:27 2021 -0800
collect table stats by default for listing table (#1347)
* collect table stats by default for listing table
* add test
---
datafusion/src/datasource/listing/table.rs | 19 ++++++++++++++++++-
1 file changed, 18 insertions(+), 1 deletion(-)
diff --git a/datafusion/src/datasource/listing/table.rs
b/datafusion/src/datasource/listing/table.rs
index aadc340..22e3f75 100644
--- a/datafusion/src/datasource/listing/table.rs
+++ b/datafusion/src/datasource/listing/table.rs
@@ -76,7 +76,7 @@ impl ListingOptions {
file_extension: String::new(),
format,
table_partition_cols: vec![],
- collect_stat: false,
+ collect_stat: true,
target_partitions: 1,
}
}
@@ -304,6 +304,23 @@ mod tests {
}
#[tokio::test]
+ async fn load_table_stats_by_default() -> Result<()> {
+ let testdata = crate::test_util::parquet_test_data();
+ let filename = format!("{}/{}", testdata, "alltypes_plain.parquet");
+ let opt = ListingOptions::new(Arc::new(ParquetFormat::default()));
+ let schema = opt
+ .infer_schema(Arc::new(LocalFileSystem {}), &filename)
+ .await?;
+ let table =
+ ListingTable::new(Arc::new(LocalFileSystem {}), filename, schema,
opt);
+ let exec = table.scan(&None, 1024, &[], None).await?;
+ assert_eq!(exec.statistics().num_rows, Some(8));
+ assert_eq!(exec.statistics().total_byte_size, Some(671));
+
+ Ok(())
+ }
+
+ #[tokio::test]
async fn read_empty_table() -> Result<()> {
let store = TestObjectStore::new_arc(&[("table/p1=v1/file.avro",
100)]);