(datafusion) 01/01: Add statistics integration tests

gabotechs Wed, 11 Feb 2026 07:37:59 -0800

This is an automated email from the ASF dual-hosted git repository.

gabotechs pushed a commit to branch add-statistics-tests
in repository https://gitbox.apache.org/repos/asf/datafusion.git


commit 4cdcdba3f94d0bfe45e670c6b1a296d148e56f04
Author: Gabriel Musat Mestre <[email protected]>
AuthorDate: Wed Feb 11 16:36:43 2026 +0100

    Add statistics integration tests
---
 datafusion/core/tests/core_integration.rs          |    3 +
 .../tests/data/tpcds_call_center_small.parquet     |  Bin 0 -> 11388 bytes
 .../tests/data/tpcds_catalog_page_small.parquet    |  Bin 0 -> 100121 bytes
 .../tests/data/tpcds_catalog_returns_small.parquet |  Bin 0 -> 154260 bytes
 .../tests/data/tpcds_catalog_sales_small.parquet   |  Bin 0 -> 192078 bytes
 .../data/tpcds_customer_address_small.parquet      |  Bin 0 -> 87578 bytes
 .../data/tpcds_customer_demographics_small.parquet |  Bin 0 -> 28897 bytes
 .../core/tests/data/tpcds_customer_small.parquet   |  Bin 0 -> 133120 bytes
 .../core/tests/data/tpcds_date_dim_small.parquet   |  Bin 0 -> 99235 bytes
 .../tpcds_household_demographics_small.parquet     |  Bin 0 -> 15998 bytes
 .../tests/data/tpcds_income_band_small.parquet     |  Bin 0 -> 1265 bytes
 .../core/tests/data/tpcds_inventory_small.parquet  |  Bin 0 -> 16384 bytes
 .../core/tests/data/tpcds_item_small.parquet       |  Bin 0 -> 225137 bytes
 .../core/tests/data/tpcds_promotion_small.parquet  |  Bin 0 -> 16862 bytes
 .../core/tests/data/tpcds_reason_small.parquet     |  Bin 0 -> 1598 bytes
 .../core/tests/data/tpcds_ship_mode_small.parquet  |  Bin 0 -> 2609 bytes
 .../tests/data/tpcds_store_returns_small.parquet   |  Bin 0 -> 114346 bytes
 .../tests/data/tpcds_store_sales_small.parquet     |  Bin 0 -> 126406 bytes
 .../core/tests/data/tpcds_store_small.parquet      |  Bin 0 -> 10998 bytes
 .../core/tests/data/tpcds_time_dim_small.parquet   |  Bin 0 -> 37301 bytes
 .../core/tests/data/tpcds_warehouse_small.parquet  |  Bin 0 -> 4830 bytes
 .../core/tests/data/tpcds_web_page_small.parquet   |  Bin 0 -> 5875 bytes
 .../tests/data/tpcds_web_returns_small.parquet     |  Bin 0 -> 143828 bytes
 .../core/tests/data/tpcds_web_sales_small.parquet  |  Bin 0 -> 193945 bytes
 .../core/tests/data/tpcds_web_site_small.parquet   |  Bin 0 -> 11916 bytes
 datafusion/core/tests/statistics/mod.rs            |  161 +++
 datafusion/core/tests/statistics/tpcds.rs          | 1087 ++++++++++++++++++++
 27 files changed, 1251 insertions(+)

diff --git a/datafusion/core/tests/core_integration.rs 
b/datafusion/core/tests/core_integration.rs
index bdbe722453..f93ef1acf1 100644
--- a/datafusion/core/tests/core_integration.rs
+++ b/datafusion/core/tests/core_integration.rs
@@ -60,6 +60,9 @@ mod catalog_listing;
 /// Run all tests that are found in the `tracing` directory
 mod tracing;
 
+/// Run all tests in the `statistics` directory
+mod statistics;
+
 #[cfg(test)]
 #[ctor::ctor]
 fn init() {
diff --git a/datafusion/core/tests/data/tpcds_call_center_small.parquet 
b/datafusion/core/tests/data/tpcds_call_center_small.parquet
new file mode 100644
index 0000000000..a6579e4f33
Binary files /dev/null and 
b/datafusion/core/tests/data/tpcds_call_center_small.parquet differ
diff --git a/datafusion/core/tests/data/tpcds_catalog_page_small.parquet 
b/datafusion/core/tests/data/tpcds_catalog_page_small.parquet
new file mode 100644
index 0000000000..7b25e9fd6f
Binary files /dev/null and 
b/datafusion/core/tests/data/tpcds_catalog_page_small.parquet differ
diff --git a/datafusion/core/tests/data/tpcds_catalog_returns_small.parquet 
b/datafusion/core/tests/data/tpcds_catalog_returns_small.parquet
new file mode 100644
index 0000000000..313393f377
Binary files /dev/null and 
b/datafusion/core/tests/data/tpcds_catalog_returns_small.parquet differ
diff --git a/datafusion/core/tests/data/tpcds_catalog_sales_small.parquet 
b/datafusion/core/tests/data/tpcds_catalog_sales_small.parquet
new file mode 100644
index 0000000000..2efe857cdf
Binary files /dev/null and 
b/datafusion/core/tests/data/tpcds_catalog_sales_small.parquet differ
diff --git a/datafusion/core/tests/data/tpcds_customer_address_small.parquet 
b/datafusion/core/tests/data/tpcds_customer_address_small.parquet
new file mode 100644
index 0000000000..fac4e6f10b
Binary files /dev/null and 
b/datafusion/core/tests/data/tpcds_customer_address_small.parquet differ
diff --git 
a/datafusion/core/tests/data/tpcds_customer_demographics_small.parquet 
b/datafusion/core/tests/data/tpcds_customer_demographics_small.parquet
new file mode 100644
index 0000000000..243124ffed
Binary files /dev/null and 
b/datafusion/core/tests/data/tpcds_customer_demographics_small.parquet differ
diff --git a/datafusion/core/tests/data/tpcds_customer_small.parquet 
b/datafusion/core/tests/data/tpcds_customer_small.parquet
new file mode 100644
index 0000000000..7c43defdf8
Binary files /dev/null and 
b/datafusion/core/tests/data/tpcds_customer_small.parquet differ
diff --git a/datafusion/core/tests/data/tpcds_date_dim_small.parquet 
b/datafusion/core/tests/data/tpcds_date_dim_small.parquet
new file mode 100644
index 0000000000..7b9ce3b812
Binary files /dev/null and 
b/datafusion/core/tests/data/tpcds_date_dim_small.parquet differ
diff --git 
a/datafusion/core/tests/data/tpcds_household_demographics_small.parquet 
b/datafusion/core/tests/data/tpcds_household_demographics_small.parquet
new file mode 100644
index 0000000000..e53297b6f2
Binary files /dev/null and 
b/datafusion/core/tests/data/tpcds_household_demographics_small.parquet differ
diff --git a/datafusion/core/tests/data/tpcds_income_band_small.parquet 
b/datafusion/core/tests/data/tpcds_income_band_small.parquet
new file mode 100644
index 0000000000..db1838ffad
Binary files /dev/null and 
b/datafusion/core/tests/data/tpcds_income_band_small.parquet differ
diff --git a/datafusion/core/tests/data/tpcds_inventory_small.parquet 
b/datafusion/core/tests/data/tpcds_inventory_small.parquet
new file mode 100644
index 0000000000..074757467a
Binary files /dev/null and 
b/datafusion/core/tests/data/tpcds_inventory_small.parquet differ
diff --git a/datafusion/core/tests/data/tpcds_item_small.parquet 
b/datafusion/core/tests/data/tpcds_item_small.parquet
new file mode 100644
index 0000000000..b57ed6ed12
Binary files /dev/null and 
b/datafusion/core/tests/data/tpcds_item_small.parquet differ
diff --git a/datafusion/core/tests/data/tpcds_promotion_small.parquet 
b/datafusion/core/tests/data/tpcds_promotion_small.parquet
new file mode 100644
index 0000000000..31ffe383bd
Binary files /dev/null and 
b/datafusion/core/tests/data/tpcds_promotion_small.parquet differ
diff --git a/datafusion/core/tests/data/tpcds_reason_small.parquet 
b/datafusion/core/tests/data/tpcds_reason_small.parquet
new file mode 100644
index 0000000000..1b41b04e8b
Binary files /dev/null and 
b/datafusion/core/tests/data/tpcds_reason_small.parquet differ
diff --git a/datafusion/core/tests/data/tpcds_ship_mode_small.parquet 
b/datafusion/core/tests/data/tpcds_ship_mode_small.parquet
new file mode 100644
index 0000000000..1d41ec4ada
Binary files /dev/null and 
b/datafusion/core/tests/data/tpcds_ship_mode_small.parquet differ
diff --git a/datafusion/core/tests/data/tpcds_store_returns_small.parquet 
b/datafusion/core/tests/data/tpcds_store_returns_small.parquet
new file mode 100644
index 0000000000..eb9c0de10e
Binary files /dev/null and 
b/datafusion/core/tests/data/tpcds_store_returns_small.parquet differ
diff --git a/datafusion/core/tests/data/tpcds_store_sales_small.parquet 
b/datafusion/core/tests/data/tpcds_store_sales_small.parquet
new file mode 100644
index 0000000000..670b613ec8
Binary files /dev/null and 
b/datafusion/core/tests/data/tpcds_store_sales_small.parquet differ
diff --git a/datafusion/core/tests/data/tpcds_store_small.parquet 
b/datafusion/core/tests/data/tpcds_store_small.parquet
new file mode 100644
index 0000000000..ad1e234cc2
Binary files /dev/null and 
b/datafusion/core/tests/data/tpcds_store_small.parquet differ
diff --git a/datafusion/core/tests/data/tpcds_time_dim_small.parquet 
b/datafusion/core/tests/data/tpcds_time_dim_small.parquet
new file mode 100644
index 0000000000..05cb57133c
Binary files /dev/null and 
b/datafusion/core/tests/data/tpcds_time_dim_small.parquet differ
diff --git a/datafusion/core/tests/data/tpcds_warehouse_small.parquet 
b/datafusion/core/tests/data/tpcds_warehouse_small.parquet
new file mode 100644
index 0000000000..856d07a2fc
Binary files /dev/null and 
b/datafusion/core/tests/data/tpcds_warehouse_small.parquet differ
diff --git a/datafusion/core/tests/data/tpcds_web_page_small.parquet 
b/datafusion/core/tests/data/tpcds_web_page_small.parquet
new file mode 100644
index 0000000000..f350a2da2c
Binary files /dev/null and 
b/datafusion/core/tests/data/tpcds_web_page_small.parquet differ
diff --git a/datafusion/core/tests/data/tpcds_web_returns_small.parquet 
b/datafusion/core/tests/data/tpcds_web_returns_small.parquet
new file mode 100644
index 0000000000..2b50c3932f
Binary files /dev/null and 
b/datafusion/core/tests/data/tpcds_web_returns_small.parquet differ
diff --git a/datafusion/core/tests/data/tpcds_web_sales_small.parquet 
b/datafusion/core/tests/data/tpcds_web_sales_small.parquet
new file mode 100644
index 0000000000..a4a62bfe24
Binary files /dev/null and 
b/datafusion/core/tests/data/tpcds_web_sales_small.parquet differ
diff --git a/datafusion/core/tests/data/tpcds_web_site_small.parquet 
b/datafusion/core/tests/data/tpcds_web_site_small.parquet
new file mode 100644
index 0000000000..ee9c79ce9b
Binary files /dev/null and 
b/datafusion/core/tests/data/tpcds_web_site_small.parquet differ
diff --git a/datafusion/core/tests/statistics/mod.rs 
b/datafusion/core/tests/statistics/mod.rs
new file mode 100644
index 0000000000..0a23cce156
--- /dev/null
+++ b/datafusion/core/tests/statistics/mod.rs
@@ -0,0 +1,161 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use datafusion::common::Result;
+use datafusion::physical_plan::ExecutionPlan;
+use datafusion_common::Statistics;
+use datafusion_common::stats::Precision;
+use datafusion_physical_expr_common::metrics::MetricValue;
+use std::fmt::{Debug, Display, Formatter};
+use std::sync::Arc;
+
+mod tpcds;
+
+#[derive(Debug, Default, Copy, Clone)]
+struct StatsVsMetricsDisplayOptions {
+    display_output_rows: bool,
+    display_output_bytes: bool,
+}
+
+struct Node {
+    name: String,
+    stats: Statistics,
+    output_rows: Option<usize>,
+    output_bytes: Option<usize>,
+    children: Vec<Node>,
+    opts: StatsVsMetricsDisplayOptions,
+}
+
+impl Debug for Node {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        fn fmt(f: &mut Formatter<'_>, node: &Node, depth: usize) -> 
std::fmt::Result {
+            for _ in 0..depth {
+                write!(f, "  ")?;
+            }
+            write!(f, "{}:", node.name)?;
+            fn display_opt<T: Display>(opt: Option<T>) -> impl Display {
+                match opt {
+                    None => "?".to_string(),
+                    Some(v) => v.to_string(),
+                }
+            }
+            if node.opts.display_output_bytes {
+                write!(
+                    f,
+                    " output_bytes={} vs {} ({}%)",
+                    node.stats.total_byte_size,
+                    display_opt(node.output_bytes),
+                    display_opt(accuracy_percent(
+                        node.stats.total_byte_size,
+                        node.output_bytes
+                    ))
+                )?;
+            }
+            if node.opts.display_output_rows {
+                write!(
+                    f,
+                    " output_bytes={} vs {} ({}%)",
+                    node.stats.num_rows,
+                    display_opt(node.output_rows),
+                    display_opt(accuracy_percent(node.stats.num_rows, 
node.output_rows))
+                )?;
+            }
+            writeln!(f)?;
+            for c in &node.children {
+                fmt(f, c, depth + 1)?;
+            }
+            Ok(())
+        }
+
+        fmt(f, self, 0)
+    }
+}
+
+impl Node {
+    fn from_plan(
+        plan: &Arc<dyn ExecutionPlan>,
+        opts: StatsVsMetricsDisplayOptions,
+    ) -> Result<Self> {
+        let mut children = vec![];
+        for child in plan.children() {
+            children.push(Node::from_plan(child, opts)?);
+        }
+
+        let mut node = Node {
+            name: plan.name().to_string(),
+            stats: plan.partition_statistics(None)?,
+            output_rows: None,
+            output_bytes: None,
+            children,
+            opts,
+        };
+        if let Some(metrics) = plan.metrics() {
+            node.output_rows = metrics.output_rows();
+            node.output_bytes = metrics
+                .sum(|v| matches!(v.value(), MetricValue::OutputBytes(_)))
+                .map(|v| v.as_usize());
+        }
+
+        Ok(node)
+    }
+
+    fn avg_row_accuracy(&self) -> usize {
+        fn collect_accuracy(node: &Node) -> Vec<usize> {
+            let mut results = vec![];
+            for child in &node.children {
+                results.extend(collect_accuracy(child));
+            }
+            if let Some(accuracy) =
+                accuracy_percent(node.stats.num_rows, node.output_rows)
+            {
+                results.push(accuracy);
+            }
+            results
+        }
+        let accuracy = collect_accuracy(self);
+        accuracy.iter().sum::<usize>() / accuracy.len()
+    }
+
+    fn avg_byte_accuracy(&self) -> usize {
+        fn collect_accuracy(node: &Node) -> Vec<usize> {
+            let mut results = vec![];
+            for child in &node.children {
+                results.extend(collect_accuracy(child));
+            }
+            if let Some(accuracy) =
+                accuracy_percent(node.stats.total_byte_size, node.output_bytes)
+            {
+                results.push(accuracy);
+            }
+            results
+        }
+        let accuracy = collect_accuracy(self);
+        accuracy.iter().sum::<usize>() / accuracy.len()
+    }
+}
+
+fn accuracy_percent(estimated: Precision<usize>, actual: Option<usize>) -> 
Option<usize> {
+    match (estimated.get_value(), actual) {
+        (Some(estimated), Some(actual)) => {
+            let err = (100 * estimated.abs_diff(actual)) / 
actual.max(*estimated).max(1);
+            Some(100 - err)
+        }
+        (Some(_estimated), None) => None,
+        (None, Some(_actual)) => Some(0),
+        (None, None) => None,
+    }
+}
diff --git a/datafusion/core/tests/statistics/tpcds.rs 
b/datafusion/core/tests/statistics/tpcds.rs
new file mode 100644
index 0000000000..ebc238fc23
--- /dev/null
+++ b/datafusion/core/tests/statistics/tpcds.rs
@@ -0,0 +1,1087 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::statistics::StatsVsMetricsDisplayOptions;
+use datafusion::common::Result;
+use datafusion::prelude::{ParquetReadOptions, SessionContext};
+use datafusion_physical_plan::collect;
+use std::fs;
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::statistics::Node;
+    use insta::assert_snapshot;
+    use std::fmt::{Display, Formatter};
+
+    #[tokio::test]
+    async fn tpcds_1() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(1).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=20%
+        byte_estimation_accuracy=8%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_2() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(2).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=34%
+        byte_estimation_accuracy=17%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_3() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(3).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=44%
+        byte_estimation_accuracy=10%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_4() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(4).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=22%
+        byte_estimation_accuracy=4%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_5() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(5).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=21%
+        byte_estimation_accuracy=12%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_6() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(6).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=28%
+        byte_estimation_accuracy=5%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_7() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(7).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=34%
+        byte_estimation_accuracy=4%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_8() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(8).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=34%
+        byte_estimation_accuracy=2%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_9() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(9).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=58%
+        byte_estimation_accuracy=32%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_10() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(10).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=33%
+        byte_estimation_accuracy=7%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_11() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(11).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=23%
+        byte_estimation_accuracy=6%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_12() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(12).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=22%
+        byte_estimation_accuracy=5%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_13() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(13).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=58%
+        byte_estimation_accuracy=9%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_14() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(14).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=37%
+        byte_estimation_accuracy=16%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_15() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(15).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=15%
+        byte_estimation_accuracy=8%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_16() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(16).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=24%
+        byte_estimation_accuracy=9%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_17() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(17).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=11%
+        byte_estimation_accuracy=6%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_18() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(18).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=36%
+        byte_estimation_accuracy=6%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_19() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(19).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=36%
+        byte_estimation_accuracy=7%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_20() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(20).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=22%
+        byte_estimation_accuracy=5%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_21() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(21).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=31%
+        byte_estimation_accuracy=3%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_22() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(22).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=18%
+        byte_estimation_accuracy=2%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_23() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(23).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=19%
+        byte_estimation_accuracy=12%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_24() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(24).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=33%
+        byte_estimation_accuracy=7%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_25() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(25).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=18%
+        byte_estimation_accuracy=6%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_26() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(26).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=31%
+        byte_estimation_accuracy=2%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_27() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(27).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=20%
+        byte_estimation_accuracy=3%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_28() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(28).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=73%
+        byte_estimation_accuracy=40%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_29() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(29).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=25%
+        byte_estimation_accuracy=8%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_30() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(30).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=26%
+        byte_estimation_accuracy=3%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_31() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(31).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=18%
+        byte_estimation_accuracy=6%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_32() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(32).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=27%
+        byte_estimation_accuracy=13%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_33() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(33).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=41%
+        byte_estimation_accuracy=12%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_34() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(34).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=17%
+        byte_estimation_accuracy=3%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_35() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(35).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=33%
+        byte_estimation_accuracy=8%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_36() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(36).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=10%
+        byte_estimation_accuracy=3%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_37() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(37).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=27%
+        byte_estimation_accuracy=9%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_38() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(38).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=27%
+        byte_estimation_accuracy=7%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_39() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(39).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=15%
+        byte_estimation_accuracy=9%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_40() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(40).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=35%
+        byte_estimation_accuracy=9%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_41() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(41).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=33%
+        byte_estimation_accuracy=0%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_42() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(42).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=26%
+        byte_estimation_accuracy=6%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_43() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(43).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=23%
+        byte_estimation_accuracy=3%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_44() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(44).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=20%
+        byte_estimation_accuracy=3%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_45() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(45).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=20%
+        byte_estimation_accuracy=9%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_46() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(46).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=27%
+        byte_estimation_accuracy=6%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_47() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(47).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=20%
+        byte_estimation_accuracy=7%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_48() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(48).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=64%
+        byte_estimation_accuracy=7%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_49() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(49).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=19%
+        byte_estimation_accuracy=7%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_50() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(50).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=14%
+        byte_estimation_accuracy=6%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_51() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(51).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=53%
+        byte_estimation_accuracy=17%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_52() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(52).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=25%
+        byte_estimation_accuracy=6%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_53() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(53).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=17%
+        byte_estimation_accuracy=6%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_54() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(54).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=35%
+        byte_estimation_accuracy=10%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_55() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(55).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=36%
+        byte_estimation_accuracy=6%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_56() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(56).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=25%
+        byte_estimation_accuracy=9%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_57() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(57).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=17%
+        byte_estimation_accuracy=6%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_58() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(58).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=16%
+        byte_estimation_accuracy=11%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_59() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(59).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=29%
+        byte_estimation_accuracy=5%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_60() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(60).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=30%
+        byte_estimation_accuracy=9%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_61() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(61).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=46%
+        byte_estimation_accuracy=13%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_62() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(62).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=52%
+        byte_estimation_accuracy=6%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_63() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(63).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=17%
+        byte_estimation_accuracy=6%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_64() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(64).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=35%
+        byte_estimation_accuracy=19%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_65() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(65).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=57%
+        byte_estimation_accuracy=8%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_66() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(66).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=28%
+        byte_estimation_accuracy=4%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_67() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(67).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=27%
+        byte_estimation_accuracy=3%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_68() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(68).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=21%
+        byte_estimation_accuracy=6%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_69() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(69).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=28%
+        byte_estimation_accuracy=7%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_70() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(70).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=42%
+        byte_estimation_accuracy=4%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_71() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(71).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=16%
+        byte_estimation_accuracy=7%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_72() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(72).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=40%
+        byte_estimation_accuracy=11%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_73() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(73).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=17%
+        byte_estimation_accuracy=3%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_74() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(74).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=23%
+        byte_estimation_accuracy=5%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_75() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(75).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=28%
+        byte_estimation_accuracy=7%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_76() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(76).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=23%
+        byte_estimation_accuracy=10%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_77() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(77).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=9%
+        byte_estimation_accuracy=11%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_78() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(78).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=38%
+        byte_estimation_accuracy=22%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_79() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(79).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=24%
+        byte_estimation_accuracy=4%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_80() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(80).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=30%
+        byte_estimation_accuracy=10%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_81() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(81).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=21%
+        byte_estimation_accuracy=3%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_82() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(82).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=27%
+        byte_estimation_accuracy=9%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_83() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(83).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=16%
+        byte_estimation_accuracy=10%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_84() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(84).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=36%
+        byte_estimation_accuracy=14%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_85() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(85).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=41%
+        byte_estimation_accuracy=7%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_86() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(86).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=19%
+        byte_estimation_accuracy=6%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_87() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(87).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=26%
+        byte_estimation_accuracy=7%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_88() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(88).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=60%
+        byte_estimation_accuracy=19%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_89() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(89).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=22%
+        byte_estimation_accuracy=3%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    #[ignore = "Error: ArrowError(DivideByZero, Some(\"\"))"]
+    async fn tpcds_90() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(90).await?;
+        assert_snapshot!(display, @"");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_91() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(91).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=38%
+        byte_estimation_accuracy=7%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_92() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(92).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=29%
+        byte_estimation_accuracy=15%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_93() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(93).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=41%
+        byte_estimation_accuracy=9%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_94() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(94).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=43%
+        byte_estimation_accuracy=12%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_95() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(95).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=42%
+        byte_estimation_accuracy=17%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_96() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(96).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=59%
+        byte_estimation_accuracy=15%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_97() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(97).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=55%
+        byte_estimation_accuracy=10%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_98() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(98).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=22%
+        byte_estimation_accuracy=4%
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpcds_99() -> Result<()> {
+        let display = tpcds_stats_vs_metrics_display(99).await?;
+        assert_snapshot!(display, @r"
+        row_estimation_accuracy=44%
+        byte_estimation_accuracy=4%
+        ");
+        Ok(())
+    }
+
+    struct AccuracyResult {
+        row_estimation_accuracy: usize,
+        byte_estimation_accuracy: usize,
+    }
+
+    impl Display for AccuracyResult {
+        fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+            writeln!(
+                f,
+                "row_estimation_accuracy={}%",
+                self.row_estimation_accuracy
+            )?;
+            writeln!(
+                f,
+                "byte_estimation_accuracy={}%",
+                self.byte_estimation_accuracy
+            )
+        }
+    }
+
+    async fn tpcds_stats_vs_metrics_display(query_no: usize) -> 
Result<AccuracyResult> {
+        let filename = format!("tests/tpc-ds/{query_no}.sql");
+        let sql = fs::read_to_string(filename)?;
+
+        let ctx = small_tpcds_ctx().await;
+        let mut df = None;
+        for query in sql.split(';').filter(|s| !s.trim().is_empty()) {
+            df = Some(ctx.sql(query).await?);
+        }
+        let df = df.unwrap();
+        let plan = df.create_physical_plan().await?;
+        collect(plan.clone(), ctx.task_ctx()).await?;
+        let node = Node::from_plan(
+            &plan,
+            StatsVsMetricsDisplayOptions {
+                display_output_bytes: true,
+                display_output_rows: true,
+            },
+        )?;
+        println!("{node:?}");
+        Ok(AccuracyResult {
+            row_estimation_accuracy: node.avg_row_accuracy(),
+            byte_estimation_accuracy: node.avg_byte_accuracy(),
+        })
+    }
+
+    async fn small_tpcds_ctx() -> SessionContext {
+        let ctx = SessionContext::new();
+        for entry in fs::read_dir("tests/data").expect("could not read 
tests/data dir") {
+            let path = entry
+                .expect("could not get entry from tests/data dir")
+                .path();
+            let file_name = path.file_name().unwrap().to_str().unwrap();
+            if file_name.starts_with("tpcds") {
+                let table_name = file_name
+                    .trim_start_matches("tpcds_")
+                    .trim_end_matches("_small.parquet");
+                ctx.register_parquet(
+                    table_name,
+                    path.to_str().unwrap(),
+                    ParquetReadOptions::default(),
+                )
+                .await
+                .expect("Could not register parquet file as table");
+            }
+        }
+        ctx
+    }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(datafusion) 01/01: Add statistics integration tests

Reply via email to