[GitHub] [arrow-datafusion] alamb commented on a diff in pull request #7432: extract `datafusion-physical-plan` to its own crate

via GitHub Tue, 29 Aug 2023 04:25:49 -0700


alamb commented on code in PR #7432:
URL: https://github.com/apache/arrow-datafusion/pull/7432#discussion_r1307741977



##########
datafusion/physical-plan/src/filter.rs:
##########
@@ -378,77 +378,20 @@ pub type EqualAndNonEqual<'a> =
 mod tests {
 
     use super::*;
-    use crate::physical_plan::expressions::*;
-    use crate::physical_plan::ExecutionPlan;
-    use crate::physical_plan::{collect, with_new_children_if_necessary};
+    use crate::expressions::*;
     use crate::test;
     use crate::test::exec::StatisticsExec;
-    use crate::test_util;
+    use crate::ExecutionPlan;
     use arrow::datatypes::{DataType, Field, Schema};
-    use datafusion_common::utils::DataPtr;
     use datafusion_common::ColumnStatistics;
     use datafusion_common::ScalarValue;
     use datafusion_expr::Operator;
     use std::iter::Iterator;
     use std::sync::Arc;
-    use tempfile::TempDir;
-
-    #[tokio::test]
-    async fn simple_predicate() -> Result<()> {

Review Comment:
   predicate evaluation in filters is well covered by sqllogictests and since 
this test used CsvExec (which is in datasource) I simply removed the test 
instead of trying to rewrite it



##########
datafusion/physical-plan/src/projection.rs:
##########
@@ -502,88 +500,14 @@ mod tests {
         binary(l, op, r, input_schema).unwrap()
     }
 
-    #[tokio::test]
-    async fn project_first_column() -> Result<()> {

Review Comment:
   basic projections are well covered by end to end / sqllogic tests -- this 
test used `csvexec` which isn't int he physical plan crate, so I removed this 
test rather than trying to port it



##########
datafusion/physical-plan/src/windows/mod.rs:
##########
@@ -509,143 +442,6 @@ mod tests {
         Ok(())
     }
 
-    #[tokio::test]
-    async fn window_function_with_udaf() -> Result<()> {

Review Comment:
   this is already covered in the end to end tests 
https://github.com/apache/arrow-datafusion/blob/main/datafusion/core/tests/user_defined/user_defined_window_functions.rs



##########
datafusion/physical-plan/src/windows/mod.rs:
##########
@@ -509,143 +442,6 @@ mod tests {
         Ok(())
     }
 
-    #[tokio::test]
-    async fn window_function_with_udaf() -> Result<()> {
-        #[derive(Debug)]
-        struct MyCount(i64);
-
-        impl Accumulator for MyCount {
-            fn state(&self) -> Result<Vec<ScalarValue>> {
-                Ok(vec![ScalarValue::Int64(Some(self.0))])
-            }
-
-            fn update_batch(&mut self, values: &[ArrayRef]) -> Result<()> {
-                let array = &values[0];
-                self.0 += (array.len() - array.null_count()) as i64;
-                Ok(())
-            }
-
-            fn merge_batch(&mut self, states: &[ArrayRef]) -> Result<()> {
-                let counts: &Int64Array = 
arrow::array::as_primitive_array(&states[0]);
-                if let Some(c) = &arrow::compute::sum(counts) {
-                    self.0 += *c;
-                }
-                Ok(())
-            }
-
-            fn evaluate(&self) -> Result<ScalarValue> {
-                Ok(ScalarValue::Int64(Some(self.0)))
-            }
-
-            fn size(&self) -> usize {
-                std::mem::size_of_val(self)
-            }
-        }
-
-        let my_count = create_udaf(
-            "my_count",
-            vec![DataType::Int64],
-            Arc::new(DataType::Int64),
-            Volatility::Immutable,
-            Arc::new(|_| Ok(Box::new(MyCount(0)))),
-            Arc::new(vec![DataType::Int64]),
-        );
-
-        let task_ctx = Arc::new(TaskContext::default());
-        let tmp_dir = TempDir::new()?;
-        let (input, schema) = create_test_schema(1, tmp_dir.path())?;
-
-        let window_exec = Arc::new(WindowAggExec::try_new(
-            vec![create_window_expr(
-                &WindowFunction::AggregateUDF(Arc::new(my_count)),
-                "my_count".to_owned(),
-                &[col("c3", &schema)?],
-                &[],
-                &[],
-                Arc::new(WindowFrame::new(false)),
-                schema.as_ref(),
-            )?],
-            input,
-            schema.clone(),
-            vec![],
-        )?);
-
-        let result: Vec<RecordBatch> = collect(window_exec, task_ctx).await?;
-        assert_eq!(result.len(), 1);
-
-        let n_schema_fields = schema.fields().len();
-        let columns = result[0].columns();
-
-        let count: &Int64Array = 
as_primitive_array(&columns[n_schema_fields])?;
-        assert_eq!(count.value(0), 100);
-        assert_eq!(count.value(99), 100);
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn window_function() -> Result<()> {

Review Comment:
   This is redundant with the coverage in 
https://github.com/apache/arrow-datafusion/blob/main/datafusion/sqllogictest/test_files/window.slt



##########
datafusion/physical-plan/src/sorts/sort.rs:
##########
@@ -882,69 +882,41 @@ impl ExecutionPlan for SortExec {
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::physical_plan::coalesce_partitions::CoalescePartitionsExec;
-    use crate::physical_plan::collect;
-    use crate::physical_plan::expressions::col;
-    use crate::physical_plan::memory::MemoryExec;
+    use crate::coalesce_partitions::CoalescePartitionsExec;
+    use crate::collect;
+    use crate::expressions::col;
+    use crate::memory::MemoryExec;
     use crate::test;
     use crate::test::assert_is_pending;
     use crate::test::exec::{assert_strong_count_converges_to_zero, 
BlockingExec};
     use arrow::array::*;
     use arrow::compute::SortOptions;
     use arrow::datatypes::*;
-    use datafusion_common::cast::{as_primitive_array, as_string_array};
+    use datafusion_common::cast::as_primitive_array;
     use datafusion_execution::config::SessionConfig;
     use datafusion_execution::runtime_env::RuntimeConfig;
     use futures::FutureExt;
     use std::collections::HashMap;
-    use tempfile::TempDir;
 
     #[tokio::test]
     async fn test_in_mem_sort() -> Result<()> {
         let task_ctx = Arc::new(TaskContext::default());
         let partitions = 4;
-        let tmp_dir = TempDir::new()?;
-        let csv = test::scan_partitioned_csv(partitions, tmp_dir.path())?;
+        let csv = test::scan_partitioned(partitions);
         let schema = csv.schema();
 
         let sort_exec = Arc::new(SortExec::new(
-            vec![

Review Comment:
   A lot of this is simply copy/paste and relied on the CsvExec unnecessarily. 
I updated the tests to only depend on generated data



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

[GitHub] [arrow-datafusion] alamb commented on a diff in pull request #7432: extract `datafusion-physical-plan` to its own crate

Reply via email to