This is an automated email from the ASF dual-hosted git repository. blaginin pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push: new d4033eecf3 Migrate core test to insta, part1 (#16324) d4033eecf3 is described below commit d4033eecf32eb0dbd750670b08b1cb3b001c2d14 Author: Ian Lai <108986288+chen-yuan-...@users.noreply.github.com> AuthorDate: Tue Jun 17 19:37:25 2025 +0800 Migrate core test to insta, part1 (#16324) * feat: migrate core test into insta * feat: enhance tests with snapshot assertions and remove unused macros * feat: rewrite snapshot test in explain_analyze_baseline_metrics * fix: update snapshot test to normalize file paths in explain_analyze_baseline_metrics * refactor: simplify snapshot assertions by removing format calls in optimizer tests * feat: revert the original usage of assert_metrics macro in explain_analyze_baseline_metrics --------- Co-authored-by: Cheng-Yuan-Lai <a186235@g,ail.com> Co-authored-by: Ian Lai <ian....@senao.com> Co-authored-by: Dmitrii Blaginin <dmit...@blaginin.me> --- datafusion/core/tests/expr_api/simplification.rs | 65 ++- datafusion/core/tests/optimizer/mod.rs | 68 +-- datafusion/core/tests/sql/aggregates.rs | 1 + datafusion/core/tests/sql/explain_analyze.rs | 533 +++++++++++------------ datafusion/core/tests/sql/mod.rs | 1 - datafusion/core/tests/sql/select.rs | 1 + 6 files changed, 354 insertions(+), 315 deletions(-) diff --git a/datafusion/core/tests/expr_api/simplification.rs b/datafusion/core/tests/expr_api/simplification.rs index 91a507bdf7..89651726a6 100644 --- a/datafusion/core/tests/expr_api/simplification.rs +++ b/datafusion/core/tests/expr_api/simplification.rs @@ -17,6 +17,8 @@ //! This program demonstrates the DataFusion expression simplification API. +use insta::assert_snapshot; + use arrow::array::types::IntervalDayTime; use arrow::array::{ArrayRef, Int32Array}; use arrow::datatypes::{DataType, Field, Schema}; @@ -237,11 +239,15 @@ fn to_timestamp_expr_folded() -> Result<()> { .project(proj)? .build()?; - let expected = "Projection: TimestampNanosecond(1599566400000000000, None) AS to_timestamp(Utf8(\"2020-09-08T12:00:00+00:00\"))\ - \n TableScan: test" - .to_string(); - let actual = get_optimized_plan_formatted(plan, &Utc::now()); - assert_eq!(expected, actual); + let formatted = get_optimized_plan_formatted(plan, &Utc::now()); + let actual = formatted.trim(); + assert_snapshot!( + actual, + @r###" + Projection: TimestampNanosecond(1599566400000000000, None) AS to_timestamp(Utf8("2020-09-08T12:00:00+00:00")) + TableScan: test + "### + ); Ok(()) } @@ -262,11 +268,16 @@ fn now_less_than_timestamp() -> Result<()> { // Note that constant folder runs and folds the entire // expression down to a single constant (true) - let expected = "Filter: Boolean(true)\ - \n TableScan: test"; - let actual = get_optimized_plan_formatted(plan, &time); - - assert_eq!(expected, actual); + let formatted = get_optimized_plan_formatted(plan, &time); + let actual = formatted.trim(); + + assert_snapshot!( + actual, + @r###" + Filter: Boolean(true) + TableScan: test + "### + ); Ok(()) } @@ -296,11 +307,16 @@ fn select_date_plus_interval() -> Result<()> { // Note that constant folder runs and folds the entire // expression down to a single constant (true) - let expected = r#"Projection: Date32("2021-01-09") AS to_timestamp(Utf8("2020-09-08T12:05:00+00:00")) + IntervalDayTime("IntervalDayTime { days: 123, milliseconds: 0 }") - TableScan: test"#; - let actual = get_optimized_plan_formatted(plan, &time); - - assert_eq!(expected, actual); + let formatted = get_optimized_plan_formatted(plan, &time); + let actual = formatted.trim(); + + assert_snapshot!( + actual, + @r###" + Projection: Date32("2021-01-09") AS to_timestamp(Utf8("2020-09-08T12:05:00+00:00")) + IntervalDayTime("IntervalDayTime { days: 123, milliseconds: 0 }") + TableScan: test + "### + ); Ok(()) } @@ -314,10 +330,15 @@ fn simplify_project_scalar_fn() -> Result<()> { // before simplify: power(t.f, 1.0) // after simplify: t.f as "power(t.f, 1.0)" - let expected = "Projection: test.f AS power(test.f,Float64(1))\ - \n TableScan: test"; - let actual = get_optimized_plan_formatted(plan, &Utc::now()); - assert_eq!(expected, actual); + let formatter = get_optimized_plan_formatted(plan, &Utc::now()); + let actual = formatter.trim(); + assert_snapshot!( + actual, + @r###" + Projection: test.f AS power(test.f,Float64(1)) + TableScan: test + "### + ); Ok(()) } @@ -337,9 +358,9 @@ fn simplify_scan_predicate() -> Result<()> { // before simplify: t.g = power(t.f, 1.0) // after simplify: t.g = t.f" - let expected = "TableScan: test, full_filters=[g = f]"; - let actual = get_optimized_plan_formatted(plan, &Utc::now()); - assert_eq!(expected, actual); + let formatted = get_optimized_plan_formatted(plan, &Utc::now()); + let actual = formatted.trim(); + assert_snapshot!(actual, @"TableScan: test, full_filters=[g = f]"); Ok(()) } diff --git a/datafusion/core/tests/optimizer/mod.rs b/datafusion/core/tests/optimizer/mod.rs index 2daed4fe36..3b39c9adfa 100644 --- a/datafusion/core/tests/optimizer/mod.rs +++ b/datafusion/core/tests/optimizer/mod.rs @@ -18,6 +18,7 @@ //! Tests for the DataFusion SQL query planner that require functions from the //! datafusion-functions crate. +use insta::assert_snapshot; use std::any::Any; use std::collections::HashMap; use std::sync::Arc; @@ -56,9 +57,14 @@ fn init() { #[test] fn select_arrow_cast() { let sql = "SELECT arrow_cast(1234, 'Float64') as f64, arrow_cast('foo', 'LargeUtf8') as large"; - let expected = "Projection: Float64(1234) AS f64, LargeUtf8(\"foo\") AS large\ - \n EmptyRelation"; - quick_test(sql, expected); + let plan = test_sql(sql).unwrap(); + assert_snapshot!( + plan, + @r#" + Projection: Float64(1234) AS f64, LargeUtf8("foo") AS large + EmptyRelation + "# + ); } #[test] fn timestamp_nano_ts_none_predicates() -> Result<()> { @@ -68,11 +74,15 @@ fn timestamp_nano_ts_none_predicates() -> Result<()> { // a scan should have the now()... predicate folded to a single // constant and compared to the column without a cast so it can be // pushed down / pruned - let expected = - "Projection: test.col_int32\ - \n Filter: test.col_ts_nano_none < TimestampNanosecond(1666612093000000000, None)\ - \n TableScan: test projection=[col_int32, col_ts_nano_none]"; - quick_test(sql, expected); + let plan = test_sql(sql).unwrap(); + assert_snapshot!( + plan, + @r" + Projection: test.col_int32 + Filter: test.col_ts_nano_none < TimestampNanosecond(1666612093000000000, None) + TableScan: test projection=[col_int32, col_ts_nano_none] + " + ); Ok(()) } @@ -84,10 +94,15 @@ fn timestamp_nano_ts_utc_predicates() { // a scan should have the now()... predicate folded to a single // constant and compared to the column without a cast so it can be // pushed down / pruned - let expected = - "Projection: test.col_int32\n Filter: test.col_ts_nano_utc < TimestampNanosecond(1666612093000000000, Some(\"+00:00\"))\ - \n TableScan: test projection=[col_int32, col_ts_nano_utc]"; - quick_test(sql, expected); + let plan = test_sql(sql).unwrap(); + assert_snapshot!( + plan, + @r#" + Projection: test.col_int32 + Filter: test.col_ts_nano_utc < TimestampNanosecond(1666612093000000000, Some("+00:00")) + TableScan: test projection=[col_int32, col_ts_nano_utc] + "# + ); } #[test] @@ -95,10 +110,14 @@ fn concat_literals() -> Result<()> { let sql = "SELECT concat(true, col_int32, false, null, 'hello', col_utf8, 12, 3.4) \ AS col FROM test"; - let expected = - "Projection: concat(Utf8(\"true\"), CAST(test.col_int32 AS Utf8), Utf8(\"falsehello\"), test.col_utf8, Utf8(\"123.4\")) AS col\ - \n TableScan: test projection=[col_int32, col_utf8]"; - quick_test(sql, expected); + let plan = test_sql(sql).unwrap(); + assert_snapshot!( + plan, + @r#" + Projection: concat(Utf8("true"), CAST(test.col_int32 AS Utf8), Utf8("falsehello"), test.col_utf8, Utf8("123.4")) AS col + TableScan: test projection=[col_int32, col_utf8] + "# + ); Ok(()) } @@ -107,16 +126,15 @@ fn concat_ws_literals() -> Result<()> { let sql = "SELECT concat_ws('-', true, col_int32, false, null, 'hello', col_utf8, 12, '', 3.4) \ AS col FROM test"; - let expected = - "Projection: concat_ws(Utf8(\"-\"), Utf8(\"true\"), CAST(test.col_int32 AS Utf8), Utf8(\"false-hello\"), test.col_utf8, Utf8(\"12--3.4\")) AS col\ - \n TableScan: test projection=[col_int32, col_utf8]"; - quick_test(sql, expected); - Ok(()) -} - -fn quick_test(sql: &str, expected_plan: &str) { let plan = test_sql(sql).unwrap(); - assert_eq!(expected_plan, format!("{plan}")); + assert_snapshot!( + plan, + @r#" + Projection: concat_ws(Utf8("-"), Utf8("true"), CAST(test.col_int32 AS Utf8), Utf8("false-hello"), test.col_utf8, Utf8("12--3.4")) AS col + TableScan: test projection=[col_int32, col_utf8] + "# + ); + Ok(()) } fn test_sql(sql: &str) -> Result<LogicalPlan> { diff --git a/datafusion/core/tests/sql/aggregates.rs b/datafusion/core/tests/sql/aggregates.rs index 7a5868874b..b705448203 100644 --- a/datafusion/core/tests/sql/aggregates.rs +++ b/datafusion/core/tests/sql/aggregates.rs @@ -19,6 +19,7 @@ use super::*; use datafusion::common::test_util::batches_to_string; use datafusion_catalog::MemTable; use datafusion_common::ScalarValue; +use insta::assert_snapshot; #[tokio::test] async fn csv_query_array_agg_distinct() -> Result<()> { diff --git a/datafusion/core/tests/sql/explain_analyze.rs b/datafusion/core/tests/sql/explain_analyze.rs index 70e94227cf..852b350b27 100644 --- a/datafusion/core/tests/sql/explain_analyze.rs +++ b/datafusion/core/tests/sql/explain_analyze.rs @@ -16,6 +16,7 @@ // under the License. use super::*; +use insta::assert_snapshot; use rstest::rstest; use datafusion::config::ConfigOptions; @@ -52,6 +53,7 @@ async fn explain_analyze_baseline_metrics() { let formatted = arrow::util::pretty::pretty_format_batches(&results) .unwrap() .to_string(); + println!("Query Output:\n\n{formatted}"); assert_metrics!( @@ -174,69 +176,66 @@ async fn csv_explain_plans() { println!("SQL: {sql}"); // // Verify schema - let expected = vec![ - "Explain [plan_type:Utf8, plan:Utf8]", - " Projection: aggregate_test_100.c1 [c1:Utf8View]", - " Filter: aggregate_test_100.c2 > Int64(10) [c1:Utf8View, c2:Int8, c3:Int16, c4:Int16, c5:Int32, c6:Int64, c7:Int16, c8:Int32, c9:UInt32, c10:UInt64, c11:Float32, c12:Float64, c13:Utf8View]", - " TableScan: aggregate_test_100 [c1:Utf8View, c2:Int8, c3:Int16, c4:Int16, c5:Int32, c6:Int64, c7:Int16, c8:Int32, c9:UInt32, c10:UInt64, c11:Float32, c12:Float64, c13:Utf8View]", - ]; let formatted = plan.display_indent_schema().to_string(); - let actual: Vec<&str> = formatted.trim().lines().collect(); - assert_eq!( - expected, actual, - "\n\nexpected:\n\n{expected:#?}\nactual:\n\n{actual:#?}\n\n" + let actual = formatted.trim(); + assert_snapshot!( + actual, + @r" + Explain [plan_type:Utf8, plan:Utf8] + Projection: aggregate_test_100.c1 [c1:Utf8View] + Filter: aggregate_test_100.c2 > Int64(10) [c1:Utf8View, c2:Int8, c3:Int16, c4:Int16, c5:Int32, c6:Int64, c7:Int16, c8:Int32, c9:UInt32, c10:UInt64, c11:Float32, c12:Float64, c13:Utf8View] + TableScan: aggregate_test_100 [c1:Utf8View, c2:Int8, c3:Int16, c4:Int16, c5:Int32, c6:Int64, c7:Int16, c8:Int32, c9:UInt32, c10:UInt64, c11:Float32, c12:Float64, c13:Utf8View] + " ); // // Verify the text format of the plan - let expected = vec![ - "Explain", - " Projection: aggregate_test_100.c1", - " Filter: aggregate_test_100.c2 > Int64(10)", - " TableScan: aggregate_test_100", - ]; let formatted = plan.display_indent().to_string(); - let actual: Vec<&str> = formatted.trim().lines().collect(); - assert_eq!( - expected, actual, - "\n\nexpected:\n\n{expected:#?}\nactual:\n\n{actual:#?}\n\n" + let actual = formatted.trim(); + assert_snapshot!( + actual, + @r###" + Explain + Projection: aggregate_test_100.c1 + Filter: aggregate_test_100.c2 > Int64(10) + TableScan: aggregate_test_100 + "### ); // // verify the grahviz format of the plan - let expected = vec![ - "// Begin DataFusion GraphViz Plan,", - "// display it online here: https://dreampuf.github.io/GraphvizOnline", - "", - "digraph {", - " subgraph cluster_1", - " {", - " graph[label=\"LogicalPlan\"]", - " 2[shape=box label=\"Explain\"]", - " 3[shape=box label=\"Projection: aggregate_test_100.c1\"]", - " 2 -> 3 [arrowhead=none, arrowtail=normal, dir=back]", - " 4[shape=box label=\"Filter: aggregate_test_100.c2 > Int64(10)\"]", - " 3 -> 4 [arrowhead=none, arrowtail=normal, dir=back]", - " 5[shape=box label=\"TableScan: aggregate_test_100\"]", - " 4 -> 5 [arrowhead=none, arrowtail=normal, dir=back]", - " }", - " subgraph cluster_6", - " {", - " graph[label=\"Detailed LogicalPlan\"]", - " 7[shape=box label=\"Explain\\nSchema: [plan_type:Utf8, plan:Utf8]\"]", - " 8[shape=box label=\"Projection: aggregate_test_100.c1\\nSchema: [c1:Utf8View]\"]", - " 7 -> 8 [arrowhead=none, arrowtail=normal, dir=back]", - " 9[shape=box label=\"Filter: aggregate_test_100.c2 > Int64(10)\\nSchema: [c1:Utf8View, c2:Int8, c3:Int16, c4:Int16, c5:Int32, c6:Int64, c7:Int16, c8:Int32, c9:UInt32, c10:UInt64, c11:Float32, c12:Float64, c13:Utf8View]\"]", - " 8 -> 9 [arrowhead=none, arrowtail=normal, dir=back]", - " 10[shape=box label=\"TableScan: aggregate_test_100\\nSchema: [c1:Utf8View, c2:Int8, c3:Int16, c4:Int16, c5:Int32, c6:Int64, c7:Int16, c8:Int32, c9:UInt32, c10:UInt64, c11:Float32, c12:Float64, c13:Utf8View]\"]", - " 9 -> 10 [arrowhead=none, arrowtail=normal, dir=back]", - " }", - "}", - "// End DataFusion GraphViz Plan", - ]; let formatted = plan.display_graphviz().to_string(); - let actual: Vec<&str> = formatted.trim().lines().collect(); - assert_eq!( - expected, actual, - "\n\nexpected:\n\n{expected:#?}\nactual:\n\n{actual:#?}\n\n" + let actual = formatted.trim(); + assert_snapshot!( + actual, + @r#" + // Begin DataFusion GraphViz Plan, + // display it online here: https://dreampuf.github.io/GraphvizOnline + + digraph { + subgraph cluster_1 + { + graph[label="LogicalPlan"] + 2[shape=box label="Explain"] + 3[shape=box label="Projection: aggregate_test_100.c1"] + 2 -> 3 [arrowhead=none, arrowtail=normal, dir=back] + 4[shape=box label="Filter: aggregate_test_100.c2 > Int64(10)"] + 3 -> 4 [arrowhead=none, arrowtail=normal, dir=back] + 5[shape=box label="TableScan: aggregate_test_100"] + 4 -> 5 [arrowhead=none, arrowtail=normal, dir=back] + } + subgraph cluster_6 + { + graph[label="Detailed LogicalPlan"] + 7[shape=box label="Explain\nSchema: [plan_type:Utf8, plan:Utf8]"] + 8[shape=box label="Projection: aggregate_test_100.c1\nSchema: [c1:Utf8View]"] + 7 -> 8 [arrowhead=none, arrowtail=normal, dir=back] + 9[shape=box label="Filter: aggregate_test_100.c2 > Int64(10)\nSchema: [c1:Utf8View, c2:Int8, c3:Int16, c4:Int16, c5:Int32, c6:Int64, c7:Int16, c8:Int32, c9:UInt32, c10:UInt64, c11:Float32, c12:Float64, c13:Utf8View]"] + 8 -> 9 [arrowhead=none, arrowtail=normal, dir=back] + 10[shape=box label="TableScan: aggregate_test_100\nSchema: [c1:Utf8View, c2:Int8, c3:Int16, c4:Int16, c5:Int32, c6:Int64, c7:Int16, c8:Int32, c9:UInt32, c10:UInt64, c11:Float32, c12:Float64, c13:Utf8View]"] + 9 -> 10 [arrowhead=none, arrowtail=normal, dir=back] + } + } + // End DataFusion GraphViz Plan + "# ); // Optimized logical plan @@ -248,69 +247,67 @@ async fn csv_explain_plans() { assert_eq!(logical_schema, optimized_logical_schema.as_ref()); // // Verify schema - let expected = vec![ - "Explain [plan_type:Utf8, plan:Utf8]", - " Projection: aggregate_test_100.c1 [c1:Utf8View]", - " Filter: aggregate_test_100.c2 > Int8(10) [c1:Utf8View, c2:Int8]", - " TableScan: aggregate_test_100 projection=[c1, c2], partial_filters=[aggregate_test_100.c2 > Int8(10)] [c1:Utf8View, c2:Int8]", - ]; let formatted = plan.display_indent_schema().to_string(); - let actual: Vec<&str> = formatted.trim().lines().collect(); - assert_eq!( - expected, actual, - "\n\nexpected:\n\n{expected:#?}\nactual:\n\n{actual:#?}\n\n" + let actual = formatted.trim(); + assert_snapshot!( + actual, + @r" + Explain [plan_type:Utf8, plan:Utf8] + Projection: aggregate_test_100.c1 [c1:Utf8View] + Filter: aggregate_test_100.c2 > Int8(10) [c1:Utf8View, c2:Int8] + TableScan: aggregate_test_100 projection=[c1, c2], partial_filters=[aggregate_test_100.c2 > Int8(10)] [c1:Utf8View, c2:Int8] + " ); // // Verify the text format of the plan - let expected = vec![ - "Explain", - " Projection: aggregate_test_100.c1", - " Filter: aggregate_test_100.c2 > Int8(10)", - " TableScan: aggregate_test_100 projection=[c1, c2], partial_filters=[aggregate_test_100.c2 > Int8(10)]", - ]; let formatted = plan.display_indent().to_string(); - let actual: Vec<&str> = formatted.trim().lines().collect(); - assert_eq!( - expected, actual, - "\n\nexpected:\n\n{expected:#?}\nactual:\n\n{actual:#?}\n\n" + let actual = formatted.trim(); + assert_snapshot!( + actual, + @r###" + Explain + Projection: aggregate_test_100.c1 + Filter: aggregate_test_100.c2 > Int8(10) + TableScan: aggregate_test_100 projection=[c1, c2], partial_filters=[aggregate_test_100.c2 > Int8(10)] + + "### ); // // verify the grahviz format of the plan - let expected = vec![ - "// Begin DataFusion GraphViz Plan,", - "// display it online here: https://dreampuf.github.io/GraphvizOnline", - "", - "digraph {", - " subgraph cluster_1", - " {", - " graph[label=\"LogicalPlan\"]", - " 2[shape=box label=\"Explain\"]", - " 3[shape=box label=\"Projection: aggregate_test_100.c1\"]", - " 2 -> 3 [arrowhead=none, arrowtail=normal, dir=back]", - " 4[shape=box label=\"Filter: aggregate_test_100.c2 > Int8(10)\"]", - " 3 -> 4 [arrowhead=none, arrowtail=normal, dir=back]", - " 5[shape=box label=\"TableScan: aggregate_test_100 projection=[c1, c2], partial_filters=[aggregate_test_100.c2 > Int8(10)]\"]", - " 4 -> 5 [arrowhead=none, arrowtail=normal, dir=back]", - " }", - " subgraph cluster_6", - " {", - " graph[label=\"Detailed LogicalPlan\"]", - " 7[shape=box label=\"Explain\\nSchema: [plan_type:Utf8, plan:Utf8]\"]", - " 8[shape=box label=\"Projection: aggregate_test_100.c1\\nSchema: [c1:Utf8View]\"]", - " 7 -> 8 [arrowhead=none, arrowtail=normal, dir=back]", - " 9[shape=box label=\"Filter: aggregate_test_100.c2 > Int8(10)\\nSchema: [c1:Utf8View, c2:Int8]\"]", - " 8 -> 9 [arrowhead=none, arrowtail=normal, dir=back]", - " 10[shape=box label=\"TableScan: aggregate_test_100 projection=[c1, c2], partial_filters=[aggregate_test_100.c2 > Int8(10)]\\nSchema: [c1:Utf8View, c2:Int8]\"]", - " 9 -> 10 [arrowhead=none, arrowtail=normal, dir=back]", - " }", - "}", - "// End DataFusion GraphViz Plan", - ]; let formatted = plan.display_graphviz().to_string(); - let actual: Vec<&str> = formatted.trim().lines().collect(); - assert_eq!( - expected, actual, - "\n\nexpected:\n\n{expected:#?}\nactual:\n\n{actual:#?}\n\n" + let actual = formatted.trim(); + assert_snapshot!( + actual, + @r#" + // Begin DataFusion GraphViz Plan, + // display it online here: https://dreampuf.github.io/GraphvizOnline + + digraph { + subgraph cluster_1 + { + graph[label="LogicalPlan"] + 2[shape=box label="Explain"] + 3[shape=box label="Projection: aggregate_test_100.c1"] + 2 -> 3 [arrowhead=none, arrowtail=normal, dir=back] + 4[shape=box label="Filter: aggregate_test_100.c2 > Int8(10)"] + 3 -> 4 [arrowhead=none, arrowtail=normal, dir=back] + 5[shape=box label="TableScan: aggregate_test_100 projection=[c1, c2], partial_filters=[aggregate_test_100.c2 > Int8(10)]"] + 4 -> 5 [arrowhead=none, arrowtail=normal, dir=back] + } + subgraph cluster_6 + { + graph[label="Detailed LogicalPlan"] + 7[shape=box label="Explain\nSchema: [plan_type:Utf8, plan:Utf8]"] + 8[shape=box label="Projection: aggregate_test_100.c1\nSchema: [c1:Utf8View]"] + 7 -> 8 [arrowhead=none, arrowtail=normal, dir=back] + 9[shape=box label="Filter: aggregate_test_100.c2 > Int8(10)\nSchema: [c1:Utf8View, c2:Int8]"] + 8 -> 9 [arrowhead=none, arrowtail=normal, dir=back] + 10[shape=box label="TableScan: aggregate_test_100 projection=[c1, c2], partial_filters=[aggregate_test_100.c2 > Int8(10)]\nSchema: [c1:Utf8View, c2:Int8]"] + 9 -> 10 [arrowhead=none, arrowtail=normal, dir=back] + } + } + // End DataFusion GraphViz Plan + "# ); // Physical plan @@ -396,69 +393,66 @@ async fn csv_explain_verbose_plans() { // // Verify schema - let expected = vec![ - "Explain [plan_type:Utf8, plan:Utf8]", - " Projection: aggregate_test_100.c1 [c1:Utf8View]", - " Filter: aggregate_test_100.c2 > Int64(10) [c1:Utf8View, c2:Int8, c3:Int16, c4:Int16, c5:Int32, c6:Int64, c7:Int16, c8:Int32, c9:UInt32, c10:UInt64, c11:Float32, c12:Float64, c13:Utf8View]", - " TableScan: aggregate_test_100 [c1:Utf8View, c2:Int8, c3:Int16, c4:Int16, c5:Int32, c6:Int64, c7:Int16, c8:Int32, c9:UInt32, c10:UInt64, c11:Float32, c12:Float64, c13:Utf8View]", - ]; let formatted = dataframe.logical_plan().display_indent_schema().to_string(); - let actual: Vec<&str> = formatted.trim().lines().collect(); - assert_eq!( - expected, actual, - "\n\nexpected:\n\n{expected:#?}\nactual:\n\n{actual:#?}\n\n" + let actual = formatted.trim(); + assert_snapshot!( + actual, + @r" + Explain [plan_type:Utf8, plan:Utf8] + Projection: aggregate_test_100.c1 [c1:Utf8View] + Filter: aggregate_test_100.c2 > Int64(10) [c1:Utf8View, c2:Int8, c3:Int16, c4:Int16, c5:Int32, c6:Int64, c7:Int16, c8:Int32, c9:UInt32, c10:UInt64, c11:Float32, c12:Float64, c13:Utf8View] + TableScan: aggregate_test_100 [c1:Utf8View, c2:Int8, c3:Int16, c4:Int16, c5:Int32, c6:Int64, c7:Int16, c8:Int32, c9:UInt32, c10:UInt64, c11:Float32, c12:Float64, c13:Utf8View] + " ); // // Verify the text format of the plan - let expected = vec![ - "Explain", - " Projection: aggregate_test_100.c1", - " Filter: aggregate_test_100.c2 > Int64(10)", - " TableScan: aggregate_test_100", - ]; let formatted = dataframe.logical_plan().display_indent().to_string(); - let actual: Vec<&str> = formatted.trim().lines().collect(); - assert_eq!( - expected, actual, - "\n\nexpected:\n\n{expected:#?}\nactual:\n\n{actual:#?}\n\n" + let actual = formatted.trim(); + assert_snapshot!( + actual, + @r###" + Explain + Projection: aggregate_test_100.c1 + Filter: aggregate_test_100.c2 > Int64(10) + TableScan: aggregate_test_100 + "### ); // // verify the grahviz format of the plan - let expected = vec![ - "// Begin DataFusion GraphViz Plan,", - "// display it online here: https://dreampuf.github.io/GraphvizOnline", - "", - "digraph {", - " subgraph cluster_1", - " {", - " graph[label=\"LogicalPlan\"]", - " 2[shape=box label=\"Explain\"]", - " 3[shape=box label=\"Projection: aggregate_test_100.c1\"]", - " 2 -> 3 [arrowhead=none, arrowtail=normal, dir=back]", - " 4[shape=box label=\"Filter: aggregate_test_100.c2 > Int64(10)\"]", - " 3 -> 4 [arrowhead=none, arrowtail=normal, dir=back]", - " 5[shape=box label=\"TableScan: aggregate_test_100\"]", - " 4 -> 5 [arrowhead=none, arrowtail=normal, dir=back]", - " }", - " subgraph cluster_6", - " {", - " graph[label=\"Detailed LogicalPlan\"]", - " 7[shape=box label=\"Explain\\nSchema: [plan_type:Utf8, plan:Utf8]\"]", - " 8[shape=box label=\"Projection: aggregate_test_100.c1\\nSchema: [c1:Utf8View]\"]", - " 7 -> 8 [arrowhead=none, arrowtail=normal, dir=back]", - " 9[shape=box label=\"Filter: aggregate_test_100.c2 > Int64(10)\\nSchema: [c1:Utf8View, c2:Int8, c3:Int16, c4:Int16, c5:Int32, c6:Int64, c7:Int16, c8:Int32, c9:UInt32, c10:UInt64, c11:Float32, c12:Float64, c13:Utf8View]\"]", - " 8 -> 9 [arrowhead=none, arrowtail=normal, dir=back]", - " 10[shape=box label=\"TableScan: aggregate_test_100\\nSchema: [c1:Utf8View, c2:Int8, c3:Int16, c4:Int16, c5:Int32, c6:Int64, c7:Int16, c8:Int32, c9:UInt32, c10:UInt64, c11:Float32, c12:Float64, c13:Utf8View]\"]", - " 9 -> 10 [arrowhead=none, arrowtail=normal, dir=back]", - " }", - "}", - "// End DataFusion GraphViz Plan", - ]; let formatted = dataframe.logical_plan().display_graphviz().to_string(); - let actual: Vec<&str> = formatted.trim().lines().collect(); - assert_eq!( - expected, actual, - "\n\nexpected:\n\n{expected:#?}\nactual:\n\n{actual:#?}\n\n" + let actual = formatted.trim(); + assert_snapshot!( + actual, + @r#" + // Begin DataFusion GraphViz Plan, + // display it online here: https://dreampuf.github.io/GraphvizOnline + + digraph { + subgraph cluster_1 + { + graph[label="LogicalPlan"] + 2[shape=box label="Explain"] + 3[shape=box label="Projection: aggregate_test_100.c1"] + 2 -> 3 [arrowhead=none, arrowtail=normal, dir=back] + 4[shape=box label="Filter: aggregate_test_100.c2 > Int64(10)"] + 3 -> 4 [arrowhead=none, arrowtail=normal, dir=back] + 5[shape=box label="TableScan: aggregate_test_100"] + 4 -> 5 [arrowhead=none, arrowtail=normal, dir=back] + } + subgraph cluster_6 + { + graph[label="Detailed LogicalPlan"] + 7[shape=box label="Explain\nSchema: [plan_type:Utf8, plan:Utf8]"] + 8[shape=box label="Projection: aggregate_test_100.c1\nSchema: [c1:Utf8View]"] + 7 -> 8 [arrowhead=none, arrowtail=normal, dir=back] + 9[shape=box label="Filter: aggregate_test_100.c2 > Int64(10)\nSchema: [c1:Utf8View, c2:Int8, c3:Int16, c4:Int16, c5:Int32, c6:Int64, c7:Int16, c8:Int32, c9:UInt32, c10:UInt64, c11:Float32, c12:Float64, c13:Utf8View]"] + 8 -> 9 [arrowhead=none, arrowtail=normal, dir=back] + 10[shape=box label="TableScan: aggregate_test_100\nSchema: [c1:Utf8View, c2:Int8, c3:Int16, c4:Int16, c5:Int32, c6:Int64, c7:Int16, c8:Int32, c9:UInt32, c10:UInt64, c11:Float32, c12:Float64, c13:Utf8View]"] + 9 -> 10 [arrowhead=none, arrowtail=normal, dir=back] + } + } + // End DataFusion GraphViz Plan + "# ); // Optimized logical plan @@ -470,69 +464,66 @@ async fn csv_explain_verbose_plans() { assert_eq!(&logical_schema, optimized_logical_schema.as_ref()); // // Verify schema - let expected = vec![ - "Explain [plan_type:Utf8, plan:Utf8]", - " Projection: aggregate_test_100.c1 [c1:Utf8View]", - " Filter: aggregate_test_100.c2 > Int8(10) [c1:Utf8View, c2:Int8]", - " TableScan: aggregate_test_100 projection=[c1, c2], partial_filters=[aggregate_test_100.c2 > Int8(10)] [c1:Utf8View, c2:Int8]", - ]; let formatted = plan.display_indent_schema().to_string(); - let actual: Vec<&str> = formatted.trim().lines().collect(); - assert_eq!( - expected, actual, - "\n\nexpected:\n\n{expected:#?}\nactual:\n\n{actual:#?}\n\n" + let actual = formatted.trim(); + assert_snapshot!( + actual, + @r" + Explain [plan_type:Utf8, plan:Utf8] + Projection: aggregate_test_100.c1 [c1:Utf8View] + Filter: aggregate_test_100.c2 > Int8(10) [c1:Utf8View, c2:Int8] + TableScan: aggregate_test_100 projection=[c1, c2], partial_filters=[aggregate_test_100.c2 > Int8(10)] [c1:Utf8View, c2:Int8] + " ); // // Verify the text format of the plan - let expected = vec![ - "Explain", - " Projection: aggregate_test_100.c1", - " Filter: aggregate_test_100.c2 > Int8(10)", - " TableScan: aggregate_test_100 projection=[c1, c2], partial_filters=[aggregate_test_100.c2 > Int8(10)]", - ]; let formatted = plan.display_indent().to_string(); - let actual: Vec<&str> = formatted.trim().lines().collect(); - assert_eq!( - expected, actual, - "\n\nexpected:\n\n{expected:#?}\nactual:\n\n{actual:#?}\n\n" + let actual = formatted.trim(); + assert_snapshot!( + actual, + @r###" + Explain + Projection: aggregate_test_100.c1 + Filter: aggregate_test_100.c2 > Int8(10) + TableScan: aggregate_test_100 projection=[c1, c2], partial_filters=[aggregate_test_100.c2 > Int8(10)] + "### ); // // verify the grahviz format of the plan - let expected = vec![ - "// Begin DataFusion GraphViz Plan,", - "// display it online here: https://dreampuf.github.io/GraphvizOnline", - "", - "digraph {", - " subgraph cluster_1", - " {", - " graph[label=\"LogicalPlan\"]", - " 2[shape=box label=\"Explain\"]", - " 3[shape=box label=\"Projection: aggregate_test_100.c1\"]", - " 2 -> 3 [arrowhead=none, arrowtail=normal, dir=back]", - " 4[shape=box label=\"Filter: aggregate_test_100.c2 > Int8(10)\"]", - " 3 -> 4 [arrowhead=none, arrowtail=normal, dir=back]", - " 5[shape=box label=\"TableScan: aggregate_test_100 projection=[c1, c2], partial_filters=[aggregate_test_100.c2 > Int8(10)]\"]", - " 4 -> 5 [arrowhead=none, arrowtail=normal, dir=back]", - " }", - " subgraph cluster_6", - " {", - " graph[label=\"Detailed LogicalPlan\"]", - " 7[shape=box label=\"Explain\\nSchema: [plan_type:Utf8, plan:Utf8]\"]", - " 8[shape=box label=\"Projection: aggregate_test_100.c1\\nSchema: [c1:Utf8View]\"]", - " 7 -> 8 [arrowhead=none, arrowtail=normal, dir=back]", - " 9[shape=box label=\"Filter: aggregate_test_100.c2 > Int8(10)\\nSchema: [c1:Utf8View, c2:Int8]\"]", - " 8 -> 9 [arrowhead=none, arrowtail=normal, dir=back]", - " 10[shape=box label=\"TableScan: aggregate_test_100 projection=[c1, c2], partial_filters=[aggregate_test_100.c2 > Int8(10)]\\nSchema: [c1:Utf8View, c2:Int8]\"]", - " 9 -> 10 [arrowhead=none, arrowtail=normal, dir=back]", - " }", - "}", - "// End DataFusion GraphViz Plan", - ]; let formatted = plan.display_graphviz().to_string(); - let actual: Vec<&str> = formatted.trim().lines().collect(); - assert_eq!( - expected, actual, - "\n\nexpected:\n\n{expected:#?}\nactual:\n\n{actual:#?}\n\n" + let actual = formatted.trim(); + assert_snapshot!( + actual, + @r#" + // Begin DataFusion GraphViz Plan, + // display it online here: https://dreampuf.github.io/GraphvizOnline + + digraph { + subgraph cluster_1 + { + graph[label="LogicalPlan"] + 2[shape=box label="Explain"] + 3[shape=box label="Projection: aggregate_test_100.c1"] + 2 -> 3 [arrowhead=none, arrowtail=normal, dir=back] + 4[shape=box label="Filter: aggregate_test_100.c2 > Int8(10)"] + 3 -> 4 [arrowhead=none, arrowtail=normal, dir=back] + 5[shape=box label="TableScan: aggregate_test_100 projection=[c1, c2], partial_filters=[aggregate_test_100.c2 > Int8(10)]"] + 4 -> 5 [arrowhead=none, arrowtail=normal, dir=back] + } + subgraph cluster_6 + { + graph[label="Detailed LogicalPlan"] + 7[shape=box label="Explain\nSchema: [plan_type:Utf8, plan:Utf8]"] + 8[shape=box label="Projection: aggregate_test_100.c1\nSchema: [c1:Utf8View]"] + 7 -> 8 [arrowhead=none, arrowtail=normal, dir=back] + 9[shape=box label="Filter: aggregate_test_100.c2 > Int8(10)\nSchema: [c1:Utf8View, c2:Int8]"] + 8 -> 9 [arrowhead=none, arrowtail=normal, dir=back] + 10[shape=box label="TableScan: aggregate_test_100 projection=[c1, c2], partial_filters=[aggregate_test_100.c2 > Int8(10)]\nSchema: [c1:Utf8View, c2:Int8]"] + 9 -> 10 [arrowhead=none, arrowtail=normal, dir=back] + } + } + // End DataFusion GraphViz Plan + "# ); // Physical plan @@ -602,19 +593,6 @@ async fn test_physical_plan_display_indent() { LIMIT 10"; let dataframe = ctx.sql(sql).await.unwrap(); let physical_plan = dataframe.create_physical_plan().await.unwrap(); - let expected = vec![ - "SortPreservingMergeExec: [the_min@2 DESC], fetch=10", - " SortExec: TopK(fetch=10), expr=[the_min@2 DESC], preserve_partitioning=[true]", - " ProjectionExec: expr=[c1@0 as c1, max(aggregate_test_100.c12)@1 as max(aggregate_test_100.c12), min(aggregate_test_100.c12)@2 as the_min]", - " AggregateExec: mode=FinalPartitioned, gby=[c1@0 as c1], aggr=[max(aggregate_test_100.c12), min(aggregate_test_100.c12)]", - " CoalesceBatchesExec: target_batch_size=4096", - " RepartitionExec: partitioning=Hash([c1@0], 9000), input_partitions=9000", - " AggregateExec: mode=Partial, gby=[c1@0 as c1], aggr=[max(aggregate_test_100.c12), min(aggregate_test_100.c12)]", - " CoalesceBatchesExec: target_batch_size=4096", - " FilterExec: c12@1 < 10", - " RepartitionExec: partitioning=RoundRobinBatch(9000), input_partitions=1", - " DataSourceExec: file_groups={1 group: [[ARROW_TEST_DATA/csv/aggregate_test_100.csv]]}, projection=[c1, c12], file_type=csv, has_header=true", - ]; let normalizer = ExplainNormalizer::new(); let actual = format!("{}", displayable(physical_plan.as_ref()).indent(true)) @@ -622,10 +600,24 @@ async fn test_physical_plan_display_indent() { .lines() // normalize paths .map(|s| normalizer.normalize(s)) - .collect::<Vec<_>>(); - assert_eq!( - expected, actual, - "expected:\n{expected:#?}\nactual:\n\n{actual:#?}\n" + .collect::<Vec<_>>() + .join("\n"); + + assert_snapshot!( + actual, + @r###" + SortPreservingMergeExec: [the_min@2 DESC], fetch=10 + SortExec: TopK(fetch=10), expr=[the_min@2 DESC], preserve_partitioning=[true] + ProjectionExec: expr=[c1@0 as c1, max(aggregate_test_100.c12)@1 as max(aggregate_test_100.c12), min(aggregate_test_100.c12)@2 as the_min] + AggregateExec: mode=FinalPartitioned, gby=[c1@0 as c1], aggr=[max(aggregate_test_100.c12), min(aggregate_test_100.c12)] + CoalesceBatchesExec: target_batch_size=4096 + RepartitionExec: partitioning=Hash([c1@0], 9000), input_partitions=9000 + AggregateExec: mode=Partial, gby=[c1@0 as c1], aggr=[max(aggregate_test_100.c12), min(aggregate_test_100.c12)] + CoalesceBatchesExec: target_batch_size=4096 + FilterExec: c12@1 < 10 + RepartitionExec: partitioning=RoundRobinBatch(9000), input_partitions=1 + DataSourceExec: file_groups={1 group: [[ARROW_TEST_DATA/csv/aggregate_test_100.csv]]}, projection=[c1, c12], file_type=csv, has_header=true + "### ); } @@ -647,19 +639,6 @@ async fn test_physical_plan_display_indent_multi_children() { let dataframe = ctx.sql(sql).await.unwrap(); let physical_plan = dataframe.create_physical_plan().await.unwrap(); - let expected = vec![ - "CoalesceBatchesExec: target_batch_size=4096", - " HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c1@0, c2@0)], projection=[c1@0]", - " CoalesceBatchesExec: target_batch_size=4096", - " RepartitionExec: partitioning=Hash([c1@0], 9000), input_partitions=9000", - " RepartitionExec: partitioning=RoundRobinBatch(9000), input_partitions=1", - " DataSourceExec: file_groups={1 group: [[ARROW_TEST_DATA/csv/aggregate_test_100.csv]]}, projection=[c1], file_type=csv, has_header=true", - " CoalesceBatchesExec: target_batch_size=4096", - " RepartitionExec: partitioning=Hash([c2@0], 9000), input_partitions=9000", - " RepartitionExec: partitioning=RoundRobinBatch(9000), input_partitions=1", - " ProjectionExec: expr=[c1@0 as c2]", - " DataSourceExec: file_groups={1 group: [[ARROW_TEST_DATA/csv/aggregate_test_100.csv]]}, projection=[c1], file_type=csv, has_header=true", - ]; let normalizer = ExplainNormalizer::new(); let actual = format!("{}", displayable(physical_plan.as_ref()).indent(true)) @@ -667,11 +646,24 @@ async fn test_physical_plan_display_indent_multi_children() { .lines() // normalize paths .map(|s| normalizer.normalize(s)) - .collect::<Vec<_>>(); - - assert_eq!( - expected, actual, - "expected:\n{expected:#?}\nactual:\n\n{actual:#?}\n" + .collect::<Vec<_>>() + .join("\n"); + + assert_snapshot!( + actual, + @r###" + CoalesceBatchesExec: target_batch_size=4096 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c1@0, c2@0)], projection=[c1@0] + CoalesceBatchesExec: target_batch_size=4096 + RepartitionExec: partitioning=Hash([c1@0], 9000), input_partitions=9000 + RepartitionExec: partitioning=RoundRobinBatch(9000), input_partitions=1 + DataSourceExec: file_groups={1 group: [[ARROW_TEST_DATA/csv/aggregate_test_100.csv]]}, projection=[c1], file_type=csv, has_header=true + CoalesceBatchesExec: target_batch_size=4096 + RepartitionExec: partitioning=Hash([c2@0], 9000), input_partitions=9000 + RepartitionExec: partitioning=RoundRobinBatch(9000), input_partitions=1 + ProjectionExec: expr=[c1@0 as c2] + DataSourceExec: file_groups={1 group: [[ARROW_TEST_DATA/csv/aggregate_test_100.csv]]}, projection=[c1], file_type=csv, has_header=true + "### ); } @@ -779,14 +771,19 @@ async fn explain_logical_plan_only() { let sql = "EXPLAIN select count(*) from (values ('a', 1, 100), ('a', 2, 150)) as t (c1,c2,c3)"; let actual = execute(&ctx, sql).await; let actual = normalize_vec_for_explain(actual); - - let expected = vec![ - vec!["logical_plan", "Projection: count(Int64(1)) AS count(*)\ - \n Aggregate: groupBy=[[]], aggr=[[count(Int64(1))]]\ - \n SubqueryAlias: t\ - \n Projection:\ - \n Values: (Utf8(\"a\"), Int64(1), Int64(100)), (Utf8(\"a\"), Int64(2), Int64(150))"]]; - assert_eq!(expected, actual); + let actual = actual.into_iter().map(|r| r.join("\n")).collect::<String>(); + + assert_snapshot!( + actual, + @r#" + logical_plan + Projection: count(Int64(1)) AS count(*) + Aggregate: groupBy=[[]], aggr=[[count(Int64(1))]] + SubqueryAlias: t + Projection: + Values: (Utf8("a"), Int64(1), Int64(100)), (Utf8("a"), Int64(2), Int64(150)) + "# + ); } #[tokio::test] @@ -797,14 +794,16 @@ async fn explain_physical_plan_only() { let sql = "EXPLAIN select count(*) from (values ('a', 1, 100), ('a', 2, 150)) as t (c1,c2,c3)"; let actual = execute(&ctx, sql).await; let actual = normalize_vec_for_explain(actual); - - let expected = vec![vec![ - "physical_plan", - "ProjectionExec: expr=[2 as count(*)]\ - \n PlaceholderRowExec\ - \n", - ]]; - assert_eq!(expected, actual); + let actual = actual.into_iter().map(|r| r.join("\n")).collect::<String>(); + + assert_snapshot!( + actual, + @r###" + physical_plan + ProjectionExec: expr=[2 as count(*)] + PlaceholderRowExec + "### + ); } #[tokio::test] diff --git a/datafusion/core/tests/sql/mod.rs b/datafusion/core/tests/sql/mod.rs index 2a5597b9fb..e212ee269b 100644 --- a/datafusion/core/tests/sql/mod.rs +++ b/datafusion/core/tests/sql/mod.rs @@ -34,7 +34,6 @@ use datafusion::{execution::context::SessionContext, physical_plan::displayable} use datafusion_common::test_util::batches_to_sort_string; use datafusion_common::utils::get_available_parallelism; use datafusion_common::{assert_contains, assert_not_contains}; -use insta::assert_snapshot; use object_store::path::Path; use std::fs::File; use std::io::Write; diff --git a/datafusion/core/tests/sql/select.rs b/datafusion/core/tests/sql/select.rs index f874dd7c08..0e1210ebb8 100644 --- a/datafusion/core/tests/sql/select.rs +++ b/datafusion/core/tests/sql/select.rs @@ -17,6 +17,7 @@ use super::*; use datafusion_common::ScalarValue; +use insta::assert_snapshot; #[tokio::test] async fn test_list_query_parameters() -> Result<()> { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@datafusion.apache.org For additional commands, e-mail: commits-h...@datafusion.apache.org