This is an automated email from the ASF dual-hosted git repository.

comphead pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 66ef9b92e5 Speed up arrow_statistics test (#10735)
66ef9b92e5 is described below

commit 66ef9b92e523d2c484a5c724e5b1afd66705dd04
Author: Andrew Lamb <[email protected]>
AuthorDate: Tue Jun 4 10:03:03 2024 -0400

    Speed up arrow_statistics test (#10735)
    
    * Speed up arrow_statistics test
    
    * resolve logical conflicts
---
 datafusion/core/tests/parquet/arrow_statistics.rs | 286 ++++++++++++++--------
 1 file changed, 179 insertions(+), 107 deletions(-)

diff --git a/datafusion/core/tests/parquet/arrow_statistics.rs 
b/datafusion/core/tests/parquet/arrow_statistics.rs
index d0182f6c1c..19cc4db4d2 100644
--- a/datafusion/core/tests/parquet/arrow_statistics.rs
+++ b/datafusion/core/tests/parquet/arrow_statistics.rs
@@ -165,9 +165,9 @@ impl TestReader {
 }
 
 /// Defines a test case for statistics extraction
-struct Test {
+struct Test<'a> {
     /// The parquet file reader
-    reader: ParquetRecordBatchReaderBuilder<File>,
+    reader: &'a ParquetRecordBatchReaderBuilder<File>,
     expected_min: ArrayRef,
     expected_max: ArrayRef,
     expected_null_counts: UInt64Array,
@@ -176,7 +176,7 @@ struct Test {
     column_name: &'static str,
 }
 
-impl Test {
+impl<'a> Test<'a> {
     fn run(self) {
         let Self {
             reader,
@@ -269,7 +269,7 @@ async fn test_one_row_group_without_null() {
     let row_per_group = 20;
     let reader = parquet_file_one_column(0, 4, 7, row_per_group);
     Test {
-        reader,
+        reader: &reader,
         // min is 4
         expected_min: Arc::new(Int64Array::from(vec![4])),
         // max is 6
@@ -289,7 +289,7 @@ async fn test_one_row_group_with_null_and_negative() {
     let reader = parquet_file_one_column(2, -1, 5, row_per_group);
 
     Test {
-        reader,
+        reader: &reader,
         // min is -1
         expected_min: Arc::new(Int64Array::from(vec![-1])),
         // max is 4
@@ -309,7 +309,7 @@ async fn test_two_row_group_with_null() {
     let reader = parquet_file_one_column(2, 4, 17, row_per_group);
 
     Test {
-        reader,
+        reader: &reader,
         // mins are [4, 14]
         expected_min: Arc::new(Int64Array::from(vec![4, 14])),
         // maxes are [13, 16]
@@ -329,7 +329,7 @@ async fn test_two_row_groups_with_all_nulls_in_one() {
     let reader = parquet_file_one_column(4, -2, 2, row_per_group);
 
     Test {
-        reader,
+        reader: &reader,
         // mins are [-2, null]
         expected_min: Arc::new(Int64Array::from(vec![Some(-2), None])),
         // maxes are [1, null]
@@ -355,10 +355,12 @@ async fn test_int_64() {
     let reader = TestReader {
         scenario: Scenario::Int,
         row_per_group: 5,
-    };
+    }
+    .build()
+    .await;
 
     Test {
-        reader: reader.build().await,
+        reader: &reader,
         // mins are [-5, -4, 0, 5]
         expected_min: Arc::new(Int64Array::from(vec![-5, -4, 0, 5])),
         // maxes are [-1, 0, 4, 9]
@@ -378,10 +380,12 @@ async fn test_int_32() {
     let reader = TestReader {
         scenario: Scenario::Int,
         row_per_group: 5,
-    };
+    }
+    .build()
+    .await;
 
     Test {
-        reader: reader.build().await,
+        reader: &reader,
         // mins are [-5, -4, 0, 5]
         expected_min: Arc::new(Int32Array::from(vec![-5, -4, 0, 5])),
         // maxes are [-1, 0, 4, 9]
@@ -406,10 +410,12 @@ async fn test_int_16() {
     let reader = TestReader {
         scenario: Scenario::Int,
         row_per_group: 5,
-    };
+    }
+    .build()
+    .await;
 
     Test {
-        reader: reader.build().await,
+        reader: &reader,
         // mins are [-5, -4, 0, 5]
         // BUG: not sure why this returns same data but in Int32Array type 
even though I debugged and the columns name is "i16" an its data is Int16
         // My debugging tells me the bug is either at:
@@ -441,10 +447,12 @@ async fn test_int_8() {
     let reader = TestReader {
         scenario: Scenario::Int,
         row_per_group: 5,
-    };
+    }
+    .build()
+    .await;
 
     Test {
-        reader: reader.build().await,
+        reader: &reader,
         // mins are [-5, -4, 0, 5]
         // BUG: not sure why this returns same data but in Int32Array even 
though I debugged and the columns name is "i8" an its data is Int8
         expected_min: Arc::new(Int8Array::from(vec![-5, -4, 0, 5])), // panic 
here because the actual data is Int32Array
@@ -479,12 +487,15 @@ async fn test_timestamp() {
     let reader = TestReader {
         scenario: Scenario::Timestamps,
         row_per_group: 5,
-    };
+    }
+    .build()
+    .await;
 
     let tz = "Pacific/Efate";
 
     Test {
-        reader: reader.build().await,
+        reader: &reader,
+        // mins are [1577840461000000000, 1577840471000000000, 
1577841061000000000, 1578704461000000000,]
         expected_min: Arc::new(TimestampNanosecondArray::from(vec![
             TimestampNanosecondType::parse("2020-01-01T01:01:01"),
             TimestampNanosecondType::parse("2020-01-01T01:01:11"),
@@ -506,7 +517,7 @@ async fn test_timestamp() {
     .run();
 
     Test {
-        reader: reader.build().await,
+        reader: &reader,
         expected_min: Arc::new(
             TimestampNanosecondArray::from(vec![
                 TimestampNanosecondType::parse("2020-01-01T01:01:01"),
@@ -535,7 +546,7 @@ async fn test_timestamp() {
 
     // micros
     Test {
-        reader: reader.build().await,
+        reader: &reader,
         expected_min: Arc::new(TimestampMicrosecondArray::from(vec![
             TimestampMicrosecondType::parse("2020-01-01T01:01:01"),
             TimestampMicrosecondType::parse("2020-01-01T01:01:11"),
@@ -555,7 +566,7 @@ async fn test_timestamp() {
     .run();
 
     Test {
-        reader: reader.build().await,
+        reader: &reader,
         expected_min: Arc::new(
             TimestampMicrosecondArray::from(vec![
                 TimestampMicrosecondType::parse("2020-01-01T01:01:01"),
@@ -584,7 +595,7 @@ async fn test_timestamp() {
 
     // millis
     Test {
-        reader: reader.build().await,
+        reader: &reader,
         expected_min: Arc::new(TimestampMillisecondArray::from(vec![
             TimestampMillisecondType::parse("2020-01-01T01:01:01"),
             TimestampMillisecondType::parse("2020-01-01T01:01:11"),
@@ -604,7 +615,7 @@ async fn test_timestamp() {
     .run();
 
     Test {
-        reader: reader.build().await,
+        reader: &reader,
         expected_min: Arc::new(
             TimestampMillisecondArray::from(vec![
                 TimestampMillisecondType::parse("2020-01-01T01:01:01"),
@@ -633,7 +644,7 @@ async fn test_timestamp() {
 
     // seconds
     Test {
-        reader: reader.build().await,
+        reader: &reader,
         expected_min: Arc::new(TimestampSecondArray::from(vec![
             TimestampSecondType::parse("2020-01-01T01:01:01"),
             TimestampSecondType::parse("2020-01-01T01:01:11"),
@@ -653,7 +664,7 @@ async fn test_timestamp() {
     .run();
 
     Test {
-        reader: reader.build().await,
+        reader: &reader,
         expected_min: Arc::new(
             TimestampSecondArray::from(vec![
                 TimestampSecondType::parse("2020-01-01T01:01:01"),
@@ -699,12 +710,14 @@ async fn test_timestamp_diff_rg_sizes() {
     let reader = TestReader {
         scenario: Scenario::Timestamps,
         row_per_group: 8, // note that the row group size is 8
-    };
+    }
+    .build()
+    .await;
 
     let tz = "Pacific/Efate";
 
     Test {
-        reader: reader.build().await,
+        reader: &reader,
         expected_min: Arc::new(TimestampNanosecondArray::from(vec![
             TimestampNanosecondType::parse("2020-01-01T01:01:01"),
             TimestampNanosecondType::parse("2020-01-01T01:11:01"),
@@ -724,7 +737,7 @@ async fn test_timestamp_diff_rg_sizes() {
     .run();
 
     Test {
-        reader: reader.build().await,
+        reader: &reader,
         expected_min: Arc::new(
             TimestampNanosecondArray::from(vec![
                 TimestampNanosecondType::parse("2020-01-01T01:01:01"),
@@ -751,7 +764,7 @@ async fn test_timestamp_diff_rg_sizes() {
 
     // micros
     Test {
-        reader: reader.build().await,
+        reader: &reader,
         expected_min: Arc::new(TimestampMicrosecondArray::from(vec![
             TimestampMicrosecondType::parse("2020-01-01T01:01:01"),
             TimestampMicrosecondType::parse("2020-01-01T01:11:01"),
@@ -769,7 +782,7 @@ async fn test_timestamp_diff_rg_sizes() {
     .run();
 
     Test {
-        reader: reader.build().await,
+        reader: &reader,
         expected_min: Arc::new(
             TimestampMicrosecondArray::from(vec![
                 TimestampMicrosecondType::parse("2020-01-01T01:01:01"),
@@ -796,7 +809,7 @@ async fn test_timestamp_diff_rg_sizes() {
 
     // millis
     Test {
-        reader: reader.build().await,
+        reader: &reader,
         expected_min: Arc::new(TimestampMillisecondArray::from(vec![
             TimestampMillisecondType::parse("2020-01-01T01:01:01"),
             TimestampMillisecondType::parse("2020-01-01T01:11:01"),
@@ -814,7 +827,7 @@ async fn test_timestamp_diff_rg_sizes() {
     .run();
 
     Test {
-        reader: reader.build().await,
+        reader: &reader,
         expected_min: Arc::new(
             TimestampMillisecondArray::from(vec![
                 TimestampMillisecondType::parse("2020-01-01T01:01:01"),
@@ -841,7 +854,7 @@ async fn test_timestamp_diff_rg_sizes() {
 
     // seconds
     Test {
-        reader: reader.build().await,
+        reader: &reader,
         expected_min: Arc::new(TimestampSecondArray::from(vec![
             TimestampSecondType::parse("2020-01-01T01:01:01"),
             TimestampSecondType::parse("2020-01-01T01:11:01"),
@@ -859,7 +872,7 @@ async fn test_timestamp_diff_rg_sizes() {
     .run();
 
     Test {
-        reader: reader.build().await,
+        reader: &reader,
         expected_min: Arc::new(
             TimestampSecondArray::from(vec![
                 TimestampSecondType::parse("2020-01-01T01:01:01"),
@@ -897,9 +910,12 @@ async fn test_dates_32_diff_rg_sizes() {
     let reader = TestReader {
         scenario: Scenario::Dates,
         row_per_group: 13,
-    };
+    }
+    .build()
+    .await;
+
     Test {
-        reader: reader.build().await,
+        reader: &reader,
         // mins are [2020-01-01, 2020-10-30]
         expected_min: Arc::new(Date32Array::from(vec![
             Date32Type::parse("2020-01-01"),
@@ -920,15 +936,17 @@ async fn test_dates_32_diff_rg_sizes() {
 }
 
 #[tokio::test]
-async fn test_time32_second_and_time64_nanosecond_diff_rg_sizes() {
-    let reader_time32 = TestReader {
+async fn test_time32_second_diff_rg_sizes() {
+    let reader = TestReader {
         scenario: Scenario::Time32Second,
         row_per_group: 4,
-    };
+    }
+    .build()
+    .await;
 
     // Test for Time32Second column
     Test {
-        reader: reader_time32.build().await,
+        reader: &reader,
         // Assuming specific minimum and maximum values for demonstration
         expected_min: Arc::new(Time32SecondArray::from(vec![18506, 18510, 
18514, 18518])),
         expected_max: Arc::new(Time32SecondArray::from(vec![18509, 18513, 
18517, 18521])),
@@ -937,15 +955,20 @@ async fn 
test_time32_second_and_time64_nanosecond_diff_rg_sizes() {
         column_name: "second",
     }
     .run();
+}
 
-    let reader_time32_millisecond = TestReader {
+#[tokio::test]
+async fn test_time32_millisecond_diff_rg_sizes() {
+    let reader = TestReader {
         scenario: Scenario::Time32Millisecond,
         row_per_group: 4,
-    };
+    }
+    .build()
+    .await;
 
     // Test for Time32Millisecond column
     Test {
-        reader: reader_time32_millisecond.build().await,
+        reader: &reader,
         // Assuming specific minimum and maximum values for demonstration
         expected_min: Arc::new(Time32MillisecondArray::from(vec![
             3600000, 3600004, 3600008, 3600012,
@@ -958,15 +981,20 @@ async fn 
test_time32_second_and_time64_nanosecond_diff_rg_sizes() {
         column_name: "millisecond",
     }
     .run();
+}
 
-    let reader_time64_micro = TestReader {
+#[tokio::test]
+async fn test_time64_microsecond_diff_rg_sizes() {
+    let reader = TestReader {
         scenario: Scenario::Time64Microsecond,
         row_per_group: 4,
-    };
+    }
+    .build()
+    .await;
 
     // Test for Time64MicroSecond column
     Test {
-        reader: reader_time64_micro.build().await,
+        reader: &reader,
         // Assuming specific minimum and maximum values for demonstration
         expected_min: Arc::new(Time64MicrosecondArray::from(vec![
             1234567890123,
@@ -985,15 +1013,20 @@ async fn 
test_time32_second_and_time64_nanosecond_diff_rg_sizes() {
         column_name: "microsecond",
     }
     .run();
+}
 
-    let reader_time64_nano = TestReader {
+#[tokio::test]
+async fn test_time64_nanosecond_diff_rg_sizes() {
+    let reader = TestReader {
         scenario: Scenario::Time64Nanosecond,
         row_per_group: 4,
-    };
+    }
+    .build()
+    .await;
 
     // Test for Time32Second column
     Test {
-        reader: reader_time64_nano.build().await,
+        reader: &reader,
         // Assuming specific minimum and maximum values for demonstration
         expected_min: Arc::new(Time64NanosecondArray::from(vec![
             987654321012345,
@@ -1020,9 +1053,11 @@ async fn test_dates_64_diff_rg_sizes() {
     let reader = TestReader {
         scenario: Scenario::Dates,
         row_per_group: 13,
-    };
+    }
+    .build()
+    .await;
     Test {
-        reader: reader.build().await,
+        reader: &reader,
         expected_min: Arc::new(Date64Array::from(vec![
             Date64Type::parse("2020-01-01"),
             Date64Type::parse("2020-10-30"),
@@ -1050,10 +1085,12 @@ async fn test_uint() {
     let reader = TestReader {
         scenario: Scenario::UInt,
         row_per_group: 4,
-    };
+    }
+    .build()
+    .await;
 
     Test {
-        reader: reader.build().await,
+        reader: &reader,
         expected_min: Arc::new(UInt8Array::from(vec![0, 1, 4, 7, 251])),
         expected_max: Arc::new(UInt8Array::from(vec![3, 4, 6, 250, 254])),
         expected_null_counts: UInt64Array::from(vec![0, 0, 0, 0, 0]),
@@ -1063,7 +1100,7 @@ async fn test_uint() {
     .run();
 
     Test {
-        reader: reader.build().await,
+        reader: &reader,
         expected_min: Arc::new(UInt16Array::from(vec![0, 1, 4, 7, 251])),
         expected_max: Arc::new(UInt16Array::from(vec![3, 4, 6, 250, 254])),
         expected_null_counts: UInt64Array::from(vec![0, 0, 0, 0, 0]),
@@ -1073,7 +1110,7 @@ async fn test_uint() {
     .run();
 
     Test {
-        reader: reader.build().await,
+        reader: &reader,
         expected_min: Arc::new(UInt32Array::from(vec![0, 1, 4, 7, 251])),
         expected_max: Arc::new(UInt32Array::from(vec![3, 4, 6, 250, 254])),
         expected_null_counts: UInt64Array::from(vec![0, 0, 0, 0, 0]),
@@ -1083,7 +1120,7 @@ async fn test_uint() {
     .run();
 
     Test {
-        reader: reader.build().await,
+        reader: &reader,
         expected_min: Arc::new(UInt64Array::from(vec![0, 1, 4, 7, 251])),
         expected_max: Arc::new(UInt64Array::from(vec![3, 4, 6, 250, 254])),
         expected_null_counts: UInt64Array::from(vec![0, 0, 0, 0, 0]),
@@ -1100,10 +1137,12 @@ async fn test_int32_range() {
     let reader = TestReader {
         scenario: Scenario::Int32Range,
         row_per_group: 5,
-    };
+    }
+    .build()
+    .await;
 
     Test {
-        reader: reader.build().await,
+        reader: &reader,
         expected_min: Arc::new(Int32Array::from(vec![0])),
         expected_max: Arc::new(Int32Array::from(vec![300000])),
         expected_null_counts: UInt64Array::from(vec![0]),
@@ -1120,10 +1159,12 @@ async fn test_uint32_range() {
     let reader = TestReader {
         scenario: Scenario::UInt32Range,
         row_per_group: 5,
-    };
+    }
+    .build()
+    .await;
 
     Test {
-        reader: reader.build().await,
+        reader: &reader,
         expected_min: Arc::new(UInt32Array::from(vec![0])),
         expected_max: Arc::new(UInt32Array::from(vec![300000])),
         expected_null_counts: UInt64Array::from(vec![0]),
@@ -1139,10 +1180,12 @@ async fn test_numeric_limits_unsigned() {
     let reader = TestReader {
         scenario: Scenario::NumericLimits,
         row_per_group: 5,
-    };
+    }
+    .build()
+    .await;
 
     Test {
-        reader: reader.build().await,
+        reader: &reader,
         expected_min: Arc::new(UInt8Array::from(vec![u8::MIN, 100])),
         expected_max: Arc::new(UInt8Array::from(vec![100, u8::MAX])),
         expected_null_counts: UInt64Array::from(vec![0, 0]),
@@ -1152,7 +1195,7 @@ async fn test_numeric_limits_unsigned() {
     .run();
 
     Test {
-        reader: reader.build().await,
+        reader: &reader,
         expected_min: Arc::new(UInt16Array::from(vec![u16::MIN, 100])),
         expected_max: Arc::new(UInt16Array::from(vec![100, u16::MAX])),
         expected_null_counts: UInt64Array::from(vec![0, 0]),
@@ -1162,7 +1205,7 @@ async fn test_numeric_limits_unsigned() {
     .run();
 
     Test {
-        reader: reader.build().await,
+        reader: &reader,
         expected_min: Arc::new(UInt32Array::from(vec![u32::MIN, 100])),
         expected_max: Arc::new(UInt32Array::from(vec![100, u32::MAX])),
         expected_null_counts: UInt64Array::from(vec![0, 0]),
@@ -1172,7 +1215,7 @@ async fn test_numeric_limits_unsigned() {
     .run();
 
     Test {
-        reader: reader.build().await,
+        reader: &reader,
         expected_min: Arc::new(UInt64Array::from(vec![u64::MIN, 100])),
         expected_max: Arc::new(UInt64Array::from(vec![100, u64::MAX])),
         expected_null_counts: UInt64Array::from(vec![0, 0]),
@@ -1188,10 +1231,12 @@ async fn test_numeric_limits_signed() {
     let reader = TestReader {
         scenario: Scenario::NumericLimits,
         row_per_group: 5,
-    };
+    }
+    .build()
+    .await;
 
     Test {
-        reader: reader.build().await,
+        reader: &reader,
         expected_min: Arc::new(Int8Array::from(vec![i8::MIN, -100])),
         expected_max: Arc::new(Int8Array::from(vec![100, i8::MAX])),
         expected_null_counts: UInt64Array::from(vec![0, 0]),
@@ -1201,7 +1246,7 @@ async fn test_numeric_limits_signed() {
     .run();
 
     Test {
-        reader: reader.build().await,
+        reader: &reader,
         expected_min: Arc::new(Int16Array::from(vec![i16::MIN, -100])),
         expected_max: Arc::new(Int16Array::from(vec![100, i16::MAX])),
         expected_null_counts: UInt64Array::from(vec![0, 0]),
@@ -1211,7 +1256,7 @@ async fn test_numeric_limits_signed() {
     .run();
 
     Test {
-        reader: reader.build().await,
+        reader: &reader,
         expected_min: Arc::new(Int32Array::from(vec![i32::MIN, -100])),
         expected_max: Arc::new(Int32Array::from(vec![100, i32::MAX])),
         expected_null_counts: UInt64Array::from(vec![0, 0]),
@@ -1221,7 +1266,7 @@ async fn test_numeric_limits_signed() {
     .run();
 
     Test {
-        reader: reader.build().await,
+        reader: &reader,
         expected_min: Arc::new(Int64Array::from(vec![i64::MIN, -100])),
         expected_max: Arc::new(Int64Array::from(vec![100, i64::MAX])),
         expected_null_counts: UInt64Array::from(vec![0, 0]),
@@ -1237,10 +1282,12 @@ async fn test_numeric_limits_float() {
     let reader = TestReader {
         scenario: Scenario::NumericLimits,
         row_per_group: 5,
-    };
+    }
+    .build()
+    .await;
 
     Test {
-        reader: reader.build().await,
+        reader: &reader,
         expected_min: Arc::new(Float32Array::from(vec![f32::MIN, -100.0])),
         expected_max: Arc::new(Float32Array::from(vec![100.0, f32::MAX])),
         expected_null_counts: UInt64Array::from(vec![0, 0]),
@@ -1250,7 +1297,7 @@ async fn test_numeric_limits_float() {
     .run();
 
     Test {
-        reader: reader.build().await,
+        reader: &reader,
         expected_min: Arc::new(Float64Array::from(vec![f64::MIN, -100.0])),
         expected_max: Arc::new(Float64Array::from(vec![100.0, f64::MAX])),
         expected_null_counts: UInt64Array::from(vec![0, 0]),
@@ -1260,7 +1307,7 @@ async fn test_numeric_limits_float() {
     .run();
 
     Test {
-        reader: reader.build().await,
+        reader: &reader,
         expected_min: Arc::new(Float32Array::from(vec![-1.0, -100.0])),
         expected_max: Arc::new(Float32Array::from(vec![100.0, -100.0])),
         expected_null_counts: UInt64Array::from(vec![0, 0]),
@@ -1270,7 +1317,7 @@ async fn test_numeric_limits_float() {
     .run();
 
     Test {
-        reader: reader.build().await,
+        reader: &reader,
         expected_min: Arc::new(Float64Array::from(vec![-1.0, -100.0])),
         expected_max: Arc::new(Float64Array::from(vec![100.0, -100.0])),
         expected_null_counts: UInt64Array::from(vec![0, 0]),
@@ -1287,10 +1334,12 @@ async fn test_float64() {
     let reader = TestReader {
         scenario: Scenario::Float64,
         row_per_group: 5,
-    };
+    }
+    .build()
+    .await;
 
     Test {
-        reader: reader.build().await,
+        reader: &reader,
         expected_min: Arc::new(Float64Array::from(vec![-5.0, -4.0, -0.0, 
5.0])),
         expected_max: Arc::new(Float64Array::from(vec![-1.0, 0.0, 4.0, 9.0])),
         expected_null_counts: UInt64Array::from(vec![0, 0, 0, 0]),
@@ -1307,10 +1356,12 @@ async fn test_float16() {
     let reader = TestReader {
         scenario: Scenario::Float16,
         row_per_group: 5,
-    };
+    }
+    .build()
+    .await;
 
     Test {
-        reader: reader.build().await,
+        reader: &reader,
         expected_min: Arc::new(Float16Array::from(
             vec![-5.0, -4.0, -0.0, 5.0]
                 .into_iter()
@@ -1337,10 +1388,12 @@ async fn test_decimal() {
     let reader = TestReader {
         scenario: Scenario::Decimal,
         row_per_group: 5,
-    };
+    }
+    .build()
+    .await;
 
     Test {
-        reader: reader.build().await,
+        reader: &reader,
         expected_min: Arc::new(
             Decimal128Array::from(vec![100, -500, 2000])
                 .with_precision_and_scale(9, 2)
@@ -1356,15 +1409,20 @@ async fn test_decimal() {
         column_name: "decimal_col",
     }
     .run();
-
+}
+#[tokio::test]
+async fn test_decimal_256() {
     // This creates a parquet file of 1 column "decimal256_col" with decimal 
data type and precicion 9, scale 2
     // file has 3 record batches, each has 5 rows. They will be saved into 3 
row groups
-    let decimal256_reader = TestReader {
+    let reader = TestReader {
         scenario: Scenario::Decimal256,
         row_per_group: 5,
-    };
+    }
+    .build()
+    .await;
+
     Test {
-        reader: decimal256_reader.build().await,
+        reader: &reader,
         expected_min: Arc::new(
             Decimal256Array::from(vec![
                 i256::from(100),
@@ -1394,10 +1452,12 @@ async fn test_dictionary() {
     let reader = TestReader {
         scenario: Scenario::Dictionary,
         row_per_group: 5,
-    };
+    }
+    .build()
+    .await;
 
     Test {
-        reader: reader.build().await,
+        reader: &reader,
         expected_min: Arc::new(StringArray::from(vec!["abc", "aaa"])),
         expected_max: Arc::new(StringArray::from(vec!["def", "fffff"])),
         expected_null_counts: UInt64Array::from(vec![1, 0]),
@@ -1407,7 +1467,7 @@ async fn test_dictionary() {
     .run();
 
     Test {
-        reader: reader.build().await,
+        reader: &reader,
         expected_min: Arc::new(StringArray::from(vec!["abc", "aaa"])),
         expected_max: Arc::new(StringArray::from(vec!["def", "fffff"])),
         expected_null_counts: UInt64Array::from(vec![1, 0]),
@@ -1417,7 +1477,7 @@ async fn test_dictionary() {
     .run();
 
     Test {
-        reader: reader.build().await,
+        reader: &reader,
         expected_min: Arc::new(Int64Array::from(vec![-100, 0])),
         expected_max: Arc::new(Int64Array::from(vec![0, 100])),
         expected_null_counts: UInt64Array::from(vec![1, 0]),
@@ -1440,11 +1500,13 @@ async fn test_byte() {
     let reader = TestReader {
         scenario: Scenario::ByteArray,
         row_per_group: 5,
-    };
+    }
+    .build()
+    .await;
 
     // column "name"
     Test {
-        reader: reader.build().await,
+        reader: &reader,
         expected_min: Arc::new(StringArray::from(vec![
             "all frontends",
             "mixed",
@@ -1463,7 +1525,7 @@ async fn test_byte() {
 
     // column "service_string"
     Test {
-        reader: reader.build().await,
+        reader: &reader,
         expected_min: Arc::new(StringArray::from(vec![
             "frontend five",
             "backend one",
@@ -1489,7 +1551,7 @@ async fn test_byte() {
         vec![b"frontend two", b"frontend six", b"backend six"];
 
     Test {
-        reader: reader.build().await,
+        reader: &reader,
         expected_min: 
Arc::new(BinaryArray::from(expected_service_binary_min_values)),
         expected_max: 
Arc::new(BinaryArray::from(expected_service_binary_max_values)),
         expected_null_counts: UInt64Array::from(vec![0, 0, 0]),
@@ -1505,7 +1567,7 @@ async fn test_byte() {
     let max_input = vec![vec![102, 101, 55], vec![102, 101, 54], vec![98, 101, 
56]];
 
     Test {
-        reader: reader.build().await,
+        reader: &reader,
         expected_min: Arc::new(
             
FixedSizeBinaryArray::try_from_iter(min_input.into_iter()).unwrap(),
         ),
@@ -1525,7 +1587,7 @@ async fn test_byte() {
         vec![b"frontend two", b"frontend six", b"backend six"];
 
     Test {
-        reader: reader.build().await,
+        reader: &reader,
         expected_min: Arc::new(LargeBinaryArray::from(
             expected_service_large_binary_min_values,
         )),
@@ -1547,11 +1609,13 @@ async fn test_period_in_column_names() {
     let reader = TestReader {
         scenario: Scenario::PeriodsInColumnNames,
         row_per_group: 5,
-    };
+    }
+    .build()
+    .await;
 
     // column "name"
     Test {
-        reader: reader.build().await,
+        reader: &reader,
         expected_min: Arc::new(StringArray::from(vec![
             "HTTP GET / DISPATCH",
             "HTTP PUT / DISPATCH",
@@ -1570,7 +1634,7 @@ async fn test_period_in_column_names() {
 
     // column "service.name"
     Test {
-        reader: reader.build().await,
+        reader: &reader,
         expected_min: Arc::new(StringArray::from(vec!["frontend", "backend", 
"backend"])),
         expected_max: Arc::new(StringArray::from(vec![
             "frontend", "frontend", "backend",
@@ -1590,10 +1654,12 @@ async fn test_boolean() {
     let reader = TestReader {
         scenario: Scenario::Boolean,
         row_per_group: 5,
-    };
+    }
+    .build()
+    .await;
 
     Test {
-        reader: reader.build().await,
+        reader: &reader,
         expected_min: Arc::new(BooleanArray::from(vec![false, false])),
         expected_max: Arc::new(BooleanArray::from(vec![true, false])),
         expected_null_counts: UInt64Array::from(vec![1, 0]),
@@ -1615,9 +1681,11 @@ async fn test_struct() {
     let reader = TestReader {
         scenario: Scenario::StructArray,
         row_per_group: 5,
-    };
+    }
+    .build()
+    .await;
     Test {
-        reader: reader.build().await,
+        reader: &reader,
         expected_min: Arc::new(struct_array(vec![(Some(1), Some(6.0), 
Some(12.0))])),
         expected_max: Arc::new(struct_array(vec![(Some(2), Some(8.5), 
Some(14.0))])),
         expected_null_counts: UInt64Array::from(vec![0]),
@@ -1633,11 +1701,13 @@ async fn test_utf8() {
     let reader = TestReader {
         scenario: Scenario::UTF8,
         row_per_group: 5,
-    };
+    }
+    .build()
+    .await;
 
     // test for utf8
     Test {
-        reader: reader.build().await,
+        reader: &reader,
         expected_min: Arc::new(StringArray::from(vec!["a", "e"])),
         expected_max: Arc::new(StringArray::from(vec!["d", "i"])),
         expected_null_counts: UInt64Array::from(vec![1, 0]),
@@ -1648,7 +1718,7 @@ async fn test_utf8() {
 
     // test for large_utf8
     Test {
-        reader: reader.build().await,
+        reader: &reader,
         expected_min: Arc::new(LargeStringArray::from(vec!["a", "e"])),
         expected_max: Arc::new(LargeStringArray::from(vec!["d", "i"])),
         expected_null_counts: UInt64Array::from(vec![1, 0]),
@@ -1667,7 +1737,7 @@ async fn test_missing_statistics() {
         parquet_file_one_column_stats(0, 4, 7, row_per_group, 
EnabledStatistics::None);
 
     Test {
-        reader,
+        reader: &reader,
         expected_min: Arc::new(Int64Array::from(vec![None])),
         expected_max: Arc::new(Int64Array::from(vec![None])),
         expected_null_counts: UInt64Array::from(vec![None]),
@@ -1684,9 +1754,11 @@ async fn test_column_not_found() {
     let reader = TestReader {
         scenario: Scenario::Dates,
         row_per_group: 5,
-    };
+    }
+    .build()
+    .await;
     Test {
-        reader: reader.build().await,
+        reader: &reader,
         expected_min: Arc::new(Int64Array::from(vec![18262, 18565])),
         expected_max: Arc::new(Int64Array::from(vec![18564, 21865])),
         expected_null_counts: UInt64Array::from(vec![2, 2]),


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to