alamb commented on code in PR #7425:
URL: https://github.com/apache/arrow-datafusion/pull/7425#discussion_r1306434312
##########
datafusion/core/tests/custom_sources.rs:
##########
@@ -296,3 +300,77 @@ fn contains_empty_exec(plan: Arc<dyn ExecutionPlan>) ->
bool {
contains_empty_exec(Arc::clone(&plan.children()[0]))
}
}
+
+#[tokio::test]
+async fn test_get_scan_files() -> Result<()> {
+ let session_ctx = SessionContext::new();
+ let testdata = parquet_test_data();
+ let path = format!("{testdata}/alltypes_plain.parquet");
+ let options = ParquetReadOptions::new();
+ session_ctx
+ .register_parquet("t", &path, options)
+ .await
+ .unwrap();
+
+ let exec = session_ctx
+ .sql("select * from t")
+ .await
+ .unwrap()
+ .create_physical_plan()
+ .await
+ .unwrap();
+
+ // test PartitionedFileFinder API
+ let scan_files = PartitionedFileFinder::new().find(exec.clone());
+ validate_name(scan_files, "alltypes_plain.parquet");
+
+ // test get_scan_files API
+ validate_name(get_scan_files(exec).unwrap(), "alltypes_plain.parquet");
+
+ Ok(())
+}
+
+#[tokio::test]
+async fn test_get_scan_files_custom() -> Result<()> {
+ let ctx = SessionContext::new();
+
+ let exec = ctx
+ .read_table(Arc::new(CustomTableProvider))
+ .unwrap()
+ .create_physical_plan()
+ .await
+ .unwrap();
+
+ let scan_files = PartitionedFileFinder::new().find(exec.clone());
+ assert!(scan_files.is_empty());
+
+ // with custom finder
+ let scan_files = PartitionedFileFinder::new()
+ .with_finder(|exec| {
Review Comment:
here is an example of how to find files using a custom datasource / plan --
cc @not-my-profile
##########
datafusion/core/tests/custom_sources.rs:
##########
@@ -296,3 +300,77 @@ fn contains_empty_exec(plan: Arc<dyn ExecutionPlan>) ->
bool {
contains_empty_exec(Arc::clone(&plan.children()[0]))
}
}
+
+#[tokio::test]
+async fn test_get_scan_files() -> Result<()> {
Review Comment:
I think these are better tests and they show the intended use of
`get_scan_files`
##########
datafusion/core/src/physical_plan/mod.rs:
##########
@@ -23,7 +23,6 @@ use self::metrics::MetricsSet;
use self::{
coalesce_partitions::CoalescePartitionsExec,
display::DisplayableExecutionPlan,
};
-use crate::datasource::physical_plan::FileScanConfig;
Review Comment:
the point of this PR is to remove this line
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]