mkleen commented on code in PR #20047:
URL: https://github.com/apache/datafusion/pull/20047#discussion_r3208656616
##########
datafusion-cli/src/main.rs:
##########
@@ -687,68 +682,17 @@ mod tests {
.await?;
}
- // When the cache manager creates a StatisticsCache by default,
- // the contents will show up here
- let sql = "SELECT split_part(path, '/', -1) as filename,
file_size_bytes, num_rows, num_columns, table_size_bytes from
statistics_cache() order by filename";
+ let sql = "SELECT split_part(path, '/', -1) as filename, table,
file_size_bytes, num_rows, num_columns, table_size_bytes from
statistics_cache() order by filename";
let df = ctx.sql(sql).await?;
let rbs = df.collect().await?;
assert_snapshot!(batches_to_string(&rbs),@r"
- ++
- ++
- ");
-
- Ok(())
- }
-
- // Can be removed when https://github.com/apache/datafusion/issues/19217
is resolved
- #[tokio::test]
- async fn test_statistics_cache_override() -> Result<(), DataFusionError> {
- // Install a specific StatisticsCache implementation
- let file_statistics_cache =
Arc::new(DefaultFileStatisticsCache::default());
- let cache_config = CacheManagerConfig::default()
- .with_files_statistics_cache(Some(file_statistics_cache.clone()));
- let runtime = RuntimeEnvBuilder::new()
- .with_cache_manager(cache_config)
- .build()?;
- let config = SessionConfig::new().with_collect_statistics(true);
- let ctx = SessionContext::new_with_config_rt(config,
Arc::new(runtime));
-
- ctx.register_udtf(
- "statistics_cache",
- Arc::new(StatisticsCacheFunc::new(
- ctx.task_ctx().runtime_env().cache_manager.clone(),
- )),
- );
-
- for filename in [
- "alltypes_plain",
- "alltypes_tiny_pages",
- "lz4_raw_compressed_larger",
- ] {
- ctx.sql(
- format!(
- "create external table {filename}
- stored as parquet
- location '../parquet-testing/data/{filename}.parquet'",
- )
- .as_str(),
- )
- .await?
- .collect()
- .await?;
- }
-
- let sql = "SELECT split_part(path, '/', -1) as filename,
file_size_bytes, num_rows, num_columns, table_size_bytes from
statistics_cache() order by filename";
- let df = ctx.sql(sql).await?;
- let rbs = df.collect().await?;
- assert_snapshot!(batches_to_string(&rbs),@r"
-
+-----------------------------------+-----------------+--------------+-------------+------------------+
- | filename | file_size_bytes | num_rows |
num_columns | table_size_bytes |
-
+-----------------------------------+-----------------+--------------+-------------+------------------+
- | alltypes_plain.parquet | 1851 | Exact(8) |
11 | Absent |
- | alltypes_tiny_pages.parquet | 454233 | Exact(7300) |
13 | Absent |
- | lz4_raw_compressed_larger.parquet | 380836 | Exact(10000) |
1 | Absent |
-
+-----------------------------------+-----------------+--------------+-------------+------------------+
+
+-----------------------------------+---------------------------+-----------------+--------------+-------------+------------------+
+ | filename | table |
file_size_bytes | num_rows | num_columns | table_size_bytes |
+
+-----------------------------------+---------------------------+-----------------+--------------+-------------+------------------+
+ | alltypes_plain.parquet | alltypes_plain |
1851 | Exact(8) | 11 | Absent |
Review Comment:
Since the file statistics cache is now tablescoped, this should be reflected
in the output.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]