This is an automated email from the ASF dual-hosted git repository.
comphead pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 66ef9b92e5 Speed up arrow_statistics test (#10735)
66ef9b92e5 is described below
commit 66ef9b92e523d2c484a5c724e5b1afd66705dd04
Author: Andrew Lamb <[email protected]>
AuthorDate: Tue Jun 4 10:03:03 2024 -0400
Speed up arrow_statistics test (#10735)
* Speed up arrow_statistics test
* resolve logical conflicts
---
datafusion/core/tests/parquet/arrow_statistics.rs | 286 ++++++++++++++--------
1 file changed, 179 insertions(+), 107 deletions(-)
diff --git a/datafusion/core/tests/parquet/arrow_statistics.rs
b/datafusion/core/tests/parquet/arrow_statistics.rs
index d0182f6c1c..19cc4db4d2 100644
--- a/datafusion/core/tests/parquet/arrow_statistics.rs
+++ b/datafusion/core/tests/parquet/arrow_statistics.rs
@@ -165,9 +165,9 @@ impl TestReader {
}
/// Defines a test case for statistics extraction
-struct Test {
+struct Test<'a> {
/// The parquet file reader
- reader: ParquetRecordBatchReaderBuilder<File>,
+ reader: &'a ParquetRecordBatchReaderBuilder<File>,
expected_min: ArrayRef,
expected_max: ArrayRef,
expected_null_counts: UInt64Array,
@@ -176,7 +176,7 @@ struct Test {
column_name: &'static str,
}
-impl Test {
+impl<'a> Test<'a> {
fn run(self) {
let Self {
reader,
@@ -269,7 +269,7 @@ async fn test_one_row_group_without_null() {
let row_per_group = 20;
let reader = parquet_file_one_column(0, 4, 7, row_per_group);
Test {
- reader,
+ reader: &reader,
// min is 4
expected_min: Arc::new(Int64Array::from(vec![4])),
// max is 6
@@ -289,7 +289,7 @@ async fn test_one_row_group_with_null_and_negative() {
let reader = parquet_file_one_column(2, -1, 5, row_per_group);
Test {
- reader,
+ reader: &reader,
// min is -1
expected_min: Arc::new(Int64Array::from(vec![-1])),
// max is 4
@@ -309,7 +309,7 @@ async fn test_two_row_group_with_null() {
let reader = parquet_file_one_column(2, 4, 17, row_per_group);
Test {
- reader,
+ reader: &reader,
// mins are [4, 14]
expected_min: Arc::new(Int64Array::from(vec![4, 14])),
// maxes are [13, 16]
@@ -329,7 +329,7 @@ async fn test_two_row_groups_with_all_nulls_in_one() {
let reader = parquet_file_one_column(4, -2, 2, row_per_group);
Test {
- reader,
+ reader: &reader,
// mins are [-2, null]
expected_min: Arc::new(Int64Array::from(vec![Some(-2), None])),
// maxes are [1, null]
@@ -355,10 +355,12 @@ async fn test_int_64() {
let reader = TestReader {
scenario: Scenario::Int,
row_per_group: 5,
- };
+ }
+ .build()
+ .await;
Test {
- reader: reader.build().await,
+ reader: &reader,
// mins are [-5, -4, 0, 5]
expected_min: Arc::new(Int64Array::from(vec![-5, -4, 0, 5])),
// maxes are [-1, 0, 4, 9]
@@ -378,10 +380,12 @@ async fn test_int_32() {
let reader = TestReader {
scenario: Scenario::Int,
row_per_group: 5,
- };
+ }
+ .build()
+ .await;
Test {
- reader: reader.build().await,
+ reader: &reader,
// mins are [-5, -4, 0, 5]
expected_min: Arc::new(Int32Array::from(vec![-5, -4, 0, 5])),
// maxes are [-1, 0, 4, 9]
@@ -406,10 +410,12 @@ async fn test_int_16() {
let reader = TestReader {
scenario: Scenario::Int,
row_per_group: 5,
- };
+ }
+ .build()
+ .await;
Test {
- reader: reader.build().await,
+ reader: &reader,
// mins are [-5, -4, 0, 5]
// BUG: not sure why this returns same data but in Int32Array type
even though I debugged and the columns name is "i16" an its data is Int16
// My debugging tells me the bug is either at:
@@ -441,10 +447,12 @@ async fn test_int_8() {
let reader = TestReader {
scenario: Scenario::Int,
row_per_group: 5,
- };
+ }
+ .build()
+ .await;
Test {
- reader: reader.build().await,
+ reader: &reader,
// mins are [-5, -4, 0, 5]
// BUG: not sure why this returns same data but in Int32Array even
though I debugged and the columns name is "i8" an its data is Int8
expected_min: Arc::new(Int8Array::from(vec![-5, -4, 0, 5])), // panic
here because the actual data is Int32Array
@@ -479,12 +487,15 @@ async fn test_timestamp() {
let reader = TestReader {
scenario: Scenario::Timestamps,
row_per_group: 5,
- };
+ }
+ .build()
+ .await;
let tz = "Pacific/Efate";
Test {
- reader: reader.build().await,
+ reader: &reader,
+ // mins are [1577840461000000000, 1577840471000000000,
1577841061000000000, 1578704461000000000,]
expected_min: Arc::new(TimestampNanosecondArray::from(vec![
TimestampNanosecondType::parse("2020-01-01T01:01:01"),
TimestampNanosecondType::parse("2020-01-01T01:01:11"),
@@ -506,7 +517,7 @@ async fn test_timestamp() {
.run();
Test {
- reader: reader.build().await,
+ reader: &reader,
expected_min: Arc::new(
TimestampNanosecondArray::from(vec![
TimestampNanosecondType::parse("2020-01-01T01:01:01"),
@@ -535,7 +546,7 @@ async fn test_timestamp() {
// micros
Test {
- reader: reader.build().await,
+ reader: &reader,
expected_min: Arc::new(TimestampMicrosecondArray::from(vec![
TimestampMicrosecondType::parse("2020-01-01T01:01:01"),
TimestampMicrosecondType::parse("2020-01-01T01:01:11"),
@@ -555,7 +566,7 @@ async fn test_timestamp() {
.run();
Test {
- reader: reader.build().await,
+ reader: &reader,
expected_min: Arc::new(
TimestampMicrosecondArray::from(vec![
TimestampMicrosecondType::parse("2020-01-01T01:01:01"),
@@ -584,7 +595,7 @@ async fn test_timestamp() {
// millis
Test {
- reader: reader.build().await,
+ reader: &reader,
expected_min: Arc::new(TimestampMillisecondArray::from(vec![
TimestampMillisecondType::parse("2020-01-01T01:01:01"),
TimestampMillisecondType::parse("2020-01-01T01:01:11"),
@@ -604,7 +615,7 @@ async fn test_timestamp() {
.run();
Test {
- reader: reader.build().await,
+ reader: &reader,
expected_min: Arc::new(
TimestampMillisecondArray::from(vec![
TimestampMillisecondType::parse("2020-01-01T01:01:01"),
@@ -633,7 +644,7 @@ async fn test_timestamp() {
// seconds
Test {
- reader: reader.build().await,
+ reader: &reader,
expected_min: Arc::new(TimestampSecondArray::from(vec![
TimestampSecondType::parse("2020-01-01T01:01:01"),
TimestampSecondType::parse("2020-01-01T01:01:11"),
@@ -653,7 +664,7 @@ async fn test_timestamp() {
.run();
Test {
- reader: reader.build().await,
+ reader: &reader,
expected_min: Arc::new(
TimestampSecondArray::from(vec![
TimestampSecondType::parse("2020-01-01T01:01:01"),
@@ -699,12 +710,14 @@ async fn test_timestamp_diff_rg_sizes() {
let reader = TestReader {
scenario: Scenario::Timestamps,
row_per_group: 8, // note that the row group size is 8
- };
+ }
+ .build()
+ .await;
let tz = "Pacific/Efate";
Test {
- reader: reader.build().await,
+ reader: &reader,
expected_min: Arc::new(TimestampNanosecondArray::from(vec![
TimestampNanosecondType::parse("2020-01-01T01:01:01"),
TimestampNanosecondType::parse("2020-01-01T01:11:01"),
@@ -724,7 +737,7 @@ async fn test_timestamp_diff_rg_sizes() {
.run();
Test {
- reader: reader.build().await,
+ reader: &reader,
expected_min: Arc::new(
TimestampNanosecondArray::from(vec![
TimestampNanosecondType::parse("2020-01-01T01:01:01"),
@@ -751,7 +764,7 @@ async fn test_timestamp_diff_rg_sizes() {
// micros
Test {
- reader: reader.build().await,
+ reader: &reader,
expected_min: Arc::new(TimestampMicrosecondArray::from(vec![
TimestampMicrosecondType::parse("2020-01-01T01:01:01"),
TimestampMicrosecondType::parse("2020-01-01T01:11:01"),
@@ -769,7 +782,7 @@ async fn test_timestamp_diff_rg_sizes() {
.run();
Test {
- reader: reader.build().await,
+ reader: &reader,
expected_min: Arc::new(
TimestampMicrosecondArray::from(vec![
TimestampMicrosecondType::parse("2020-01-01T01:01:01"),
@@ -796,7 +809,7 @@ async fn test_timestamp_diff_rg_sizes() {
// millis
Test {
- reader: reader.build().await,
+ reader: &reader,
expected_min: Arc::new(TimestampMillisecondArray::from(vec![
TimestampMillisecondType::parse("2020-01-01T01:01:01"),
TimestampMillisecondType::parse("2020-01-01T01:11:01"),
@@ -814,7 +827,7 @@ async fn test_timestamp_diff_rg_sizes() {
.run();
Test {
- reader: reader.build().await,
+ reader: &reader,
expected_min: Arc::new(
TimestampMillisecondArray::from(vec![
TimestampMillisecondType::parse("2020-01-01T01:01:01"),
@@ -841,7 +854,7 @@ async fn test_timestamp_diff_rg_sizes() {
// seconds
Test {
- reader: reader.build().await,
+ reader: &reader,
expected_min: Arc::new(TimestampSecondArray::from(vec![
TimestampSecondType::parse("2020-01-01T01:01:01"),
TimestampSecondType::parse("2020-01-01T01:11:01"),
@@ -859,7 +872,7 @@ async fn test_timestamp_diff_rg_sizes() {
.run();
Test {
- reader: reader.build().await,
+ reader: &reader,
expected_min: Arc::new(
TimestampSecondArray::from(vec![
TimestampSecondType::parse("2020-01-01T01:01:01"),
@@ -897,9 +910,12 @@ async fn test_dates_32_diff_rg_sizes() {
let reader = TestReader {
scenario: Scenario::Dates,
row_per_group: 13,
- };
+ }
+ .build()
+ .await;
+
Test {
- reader: reader.build().await,
+ reader: &reader,
// mins are [2020-01-01, 2020-10-30]
expected_min: Arc::new(Date32Array::from(vec![
Date32Type::parse("2020-01-01"),
@@ -920,15 +936,17 @@ async fn test_dates_32_diff_rg_sizes() {
}
#[tokio::test]
-async fn test_time32_second_and_time64_nanosecond_diff_rg_sizes() {
- let reader_time32 = TestReader {
+async fn test_time32_second_diff_rg_sizes() {
+ let reader = TestReader {
scenario: Scenario::Time32Second,
row_per_group: 4,
- };
+ }
+ .build()
+ .await;
// Test for Time32Second column
Test {
- reader: reader_time32.build().await,
+ reader: &reader,
// Assuming specific minimum and maximum values for demonstration
expected_min: Arc::new(Time32SecondArray::from(vec![18506, 18510,
18514, 18518])),
expected_max: Arc::new(Time32SecondArray::from(vec![18509, 18513,
18517, 18521])),
@@ -937,15 +955,20 @@ async fn
test_time32_second_and_time64_nanosecond_diff_rg_sizes() {
column_name: "second",
}
.run();
+}
- let reader_time32_millisecond = TestReader {
+#[tokio::test]
+async fn test_time32_millisecond_diff_rg_sizes() {
+ let reader = TestReader {
scenario: Scenario::Time32Millisecond,
row_per_group: 4,
- };
+ }
+ .build()
+ .await;
// Test for Time32Millisecond column
Test {
- reader: reader_time32_millisecond.build().await,
+ reader: &reader,
// Assuming specific minimum and maximum values for demonstration
expected_min: Arc::new(Time32MillisecondArray::from(vec![
3600000, 3600004, 3600008, 3600012,
@@ -958,15 +981,20 @@ async fn
test_time32_second_and_time64_nanosecond_diff_rg_sizes() {
column_name: "millisecond",
}
.run();
+}
- let reader_time64_micro = TestReader {
+#[tokio::test]
+async fn test_time64_microsecond_diff_rg_sizes() {
+ let reader = TestReader {
scenario: Scenario::Time64Microsecond,
row_per_group: 4,
- };
+ }
+ .build()
+ .await;
// Test for Time64MicroSecond column
Test {
- reader: reader_time64_micro.build().await,
+ reader: &reader,
// Assuming specific minimum and maximum values for demonstration
expected_min: Arc::new(Time64MicrosecondArray::from(vec![
1234567890123,
@@ -985,15 +1013,20 @@ async fn
test_time32_second_and_time64_nanosecond_diff_rg_sizes() {
column_name: "microsecond",
}
.run();
+}
- let reader_time64_nano = TestReader {
+#[tokio::test]
+async fn test_time64_nanosecond_diff_rg_sizes() {
+ let reader = TestReader {
scenario: Scenario::Time64Nanosecond,
row_per_group: 4,
- };
+ }
+ .build()
+ .await;
// Test for Time32Second column
Test {
- reader: reader_time64_nano.build().await,
+ reader: &reader,
// Assuming specific minimum and maximum values for demonstration
expected_min: Arc::new(Time64NanosecondArray::from(vec![
987654321012345,
@@ -1020,9 +1053,11 @@ async fn test_dates_64_diff_rg_sizes() {
let reader = TestReader {
scenario: Scenario::Dates,
row_per_group: 13,
- };
+ }
+ .build()
+ .await;
Test {
- reader: reader.build().await,
+ reader: &reader,
expected_min: Arc::new(Date64Array::from(vec![
Date64Type::parse("2020-01-01"),
Date64Type::parse("2020-10-30"),
@@ -1050,10 +1085,12 @@ async fn test_uint() {
let reader = TestReader {
scenario: Scenario::UInt,
row_per_group: 4,
- };
+ }
+ .build()
+ .await;
Test {
- reader: reader.build().await,
+ reader: &reader,
expected_min: Arc::new(UInt8Array::from(vec![0, 1, 4, 7, 251])),
expected_max: Arc::new(UInt8Array::from(vec![3, 4, 6, 250, 254])),
expected_null_counts: UInt64Array::from(vec![0, 0, 0, 0, 0]),
@@ -1063,7 +1100,7 @@ async fn test_uint() {
.run();
Test {
- reader: reader.build().await,
+ reader: &reader,
expected_min: Arc::new(UInt16Array::from(vec![0, 1, 4, 7, 251])),
expected_max: Arc::new(UInt16Array::from(vec![3, 4, 6, 250, 254])),
expected_null_counts: UInt64Array::from(vec![0, 0, 0, 0, 0]),
@@ -1073,7 +1110,7 @@ async fn test_uint() {
.run();
Test {
- reader: reader.build().await,
+ reader: &reader,
expected_min: Arc::new(UInt32Array::from(vec![0, 1, 4, 7, 251])),
expected_max: Arc::new(UInt32Array::from(vec![3, 4, 6, 250, 254])),
expected_null_counts: UInt64Array::from(vec![0, 0, 0, 0, 0]),
@@ -1083,7 +1120,7 @@ async fn test_uint() {
.run();
Test {
- reader: reader.build().await,
+ reader: &reader,
expected_min: Arc::new(UInt64Array::from(vec![0, 1, 4, 7, 251])),
expected_max: Arc::new(UInt64Array::from(vec![3, 4, 6, 250, 254])),
expected_null_counts: UInt64Array::from(vec![0, 0, 0, 0, 0]),
@@ -1100,10 +1137,12 @@ async fn test_int32_range() {
let reader = TestReader {
scenario: Scenario::Int32Range,
row_per_group: 5,
- };
+ }
+ .build()
+ .await;
Test {
- reader: reader.build().await,
+ reader: &reader,
expected_min: Arc::new(Int32Array::from(vec![0])),
expected_max: Arc::new(Int32Array::from(vec![300000])),
expected_null_counts: UInt64Array::from(vec![0]),
@@ -1120,10 +1159,12 @@ async fn test_uint32_range() {
let reader = TestReader {
scenario: Scenario::UInt32Range,
row_per_group: 5,
- };
+ }
+ .build()
+ .await;
Test {
- reader: reader.build().await,
+ reader: &reader,
expected_min: Arc::new(UInt32Array::from(vec![0])),
expected_max: Arc::new(UInt32Array::from(vec![300000])),
expected_null_counts: UInt64Array::from(vec![0]),
@@ -1139,10 +1180,12 @@ async fn test_numeric_limits_unsigned() {
let reader = TestReader {
scenario: Scenario::NumericLimits,
row_per_group: 5,
- };
+ }
+ .build()
+ .await;
Test {
- reader: reader.build().await,
+ reader: &reader,
expected_min: Arc::new(UInt8Array::from(vec![u8::MIN, 100])),
expected_max: Arc::new(UInt8Array::from(vec![100, u8::MAX])),
expected_null_counts: UInt64Array::from(vec![0, 0]),
@@ -1152,7 +1195,7 @@ async fn test_numeric_limits_unsigned() {
.run();
Test {
- reader: reader.build().await,
+ reader: &reader,
expected_min: Arc::new(UInt16Array::from(vec![u16::MIN, 100])),
expected_max: Arc::new(UInt16Array::from(vec![100, u16::MAX])),
expected_null_counts: UInt64Array::from(vec![0, 0]),
@@ -1162,7 +1205,7 @@ async fn test_numeric_limits_unsigned() {
.run();
Test {
- reader: reader.build().await,
+ reader: &reader,
expected_min: Arc::new(UInt32Array::from(vec![u32::MIN, 100])),
expected_max: Arc::new(UInt32Array::from(vec![100, u32::MAX])),
expected_null_counts: UInt64Array::from(vec![0, 0]),
@@ -1172,7 +1215,7 @@ async fn test_numeric_limits_unsigned() {
.run();
Test {
- reader: reader.build().await,
+ reader: &reader,
expected_min: Arc::new(UInt64Array::from(vec![u64::MIN, 100])),
expected_max: Arc::new(UInt64Array::from(vec![100, u64::MAX])),
expected_null_counts: UInt64Array::from(vec![0, 0]),
@@ -1188,10 +1231,12 @@ async fn test_numeric_limits_signed() {
let reader = TestReader {
scenario: Scenario::NumericLimits,
row_per_group: 5,
- };
+ }
+ .build()
+ .await;
Test {
- reader: reader.build().await,
+ reader: &reader,
expected_min: Arc::new(Int8Array::from(vec![i8::MIN, -100])),
expected_max: Arc::new(Int8Array::from(vec![100, i8::MAX])),
expected_null_counts: UInt64Array::from(vec![0, 0]),
@@ -1201,7 +1246,7 @@ async fn test_numeric_limits_signed() {
.run();
Test {
- reader: reader.build().await,
+ reader: &reader,
expected_min: Arc::new(Int16Array::from(vec![i16::MIN, -100])),
expected_max: Arc::new(Int16Array::from(vec![100, i16::MAX])),
expected_null_counts: UInt64Array::from(vec![0, 0]),
@@ -1211,7 +1256,7 @@ async fn test_numeric_limits_signed() {
.run();
Test {
- reader: reader.build().await,
+ reader: &reader,
expected_min: Arc::new(Int32Array::from(vec![i32::MIN, -100])),
expected_max: Arc::new(Int32Array::from(vec![100, i32::MAX])),
expected_null_counts: UInt64Array::from(vec![0, 0]),
@@ -1221,7 +1266,7 @@ async fn test_numeric_limits_signed() {
.run();
Test {
- reader: reader.build().await,
+ reader: &reader,
expected_min: Arc::new(Int64Array::from(vec![i64::MIN, -100])),
expected_max: Arc::new(Int64Array::from(vec![100, i64::MAX])),
expected_null_counts: UInt64Array::from(vec![0, 0]),
@@ -1237,10 +1282,12 @@ async fn test_numeric_limits_float() {
let reader = TestReader {
scenario: Scenario::NumericLimits,
row_per_group: 5,
- };
+ }
+ .build()
+ .await;
Test {
- reader: reader.build().await,
+ reader: &reader,
expected_min: Arc::new(Float32Array::from(vec![f32::MIN, -100.0])),
expected_max: Arc::new(Float32Array::from(vec![100.0, f32::MAX])),
expected_null_counts: UInt64Array::from(vec![0, 0]),
@@ -1250,7 +1297,7 @@ async fn test_numeric_limits_float() {
.run();
Test {
- reader: reader.build().await,
+ reader: &reader,
expected_min: Arc::new(Float64Array::from(vec![f64::MIN, -100.0])),
expected_max: Arc::new(Float64Array::from(vec![100.0, f64::MAX])),
expected_null_counts: UInt64Array::from(vec![0, 0]),
@@ -1260,7 +1307,7 @@ async fn test_numeric_limits_float() {
.run();
Test {
- reader: reader.build().await,
+ reader: &reader,
expected_min: Arc::new(Float32Array::from(vec![-1.0, -100.0])),
expected_max: Arc::new(Float32Array::from(vec![100.0, -100.0])),
expected_null_counts: UInt64Array::from(vec![0, 0]),
@@ -1270,7 +1317,7 @@ async fn test_numeric_limits_float() {
.run();
Test {
- reader: reader.build().await,
+ reader: &reader,
expected_min: Arc::new(Float64Array::from(vec![-1.0, -100.0])),
expected_max: Arc::new(Float64Array::from(vec![100.0, -100.0])),
expected_null_counts: UInt64Array::from(vec![0, 0]),
@@ -1287,10 +1334,12 @@ async fn test_float64() {
let reader = TestReader {
scenario: Scenario::Float64,
row_per_group: 5,
- };
+ }
+ .build()
+ .await;
Test {
- reader: reader.build().await,
+ reader: &reader,
expected_min: Arc::new(Float64Array::from(vec![-5.0, -4.0, -0.0,
5.0])),
expected_max: Arc::new(Float64Array::from(vec![-1.0, 0.0, 4.0, 9.0])),
expected_null_counts: UInt64Array::from(vec![0, 0, 0, 0]),
@@ -1307,10 +1356,12 @@ async fn test_float16() {
let reader = TestReader {
scenario: Scenario::Float16,
row_per_group: 5,
- };
+ }
+ .build()
+ .await;
Test {
- reader: reader.build().await,
+ reader: &reader,
expected_min: Arc::new(Float16Array::from(
vec![-5.0, -4.0, -0.0, 5.0]
.into_iter()
@@ -1337,10 +1388,12 @@ async fn test_decimal() {
let reader = TestReader {
scenario: Scenario::Decimal,
row_per_group: 5,
- };
+ }
+ .build()
+ .await;
Test {
- reader: reader.build().await,
+ reader: &reader,
expected_min: Arc::new(
Decimal128Array::from(vec![100, -500, 2000])
.with_precision_and_scale(9, 2)
@@ -1356,15 +1409,20 @@ async fn test_decimal() {
column_name: "decimal_col",
}
.run();
-
+}
+#[tokio::test]
+async fn test_decimal_256() {
// This creates a parquet file of 1 column "decimal256_col" with decimal
data type and precicion 9, scale 2
// file has 3 record batches, each has 5 rows. They will be saved into 3
row groups
- let decimal256_reader = TestReader {
+ let reader = TestReader {
scenario: Scenario::Decimal256,
row_per_group: 5,
- };
+ }
+ .build()
+ .await;
+
Test {
- reader: decimal256_reader.build().await,
+ reader: &reader,
expected_min: Arc::new(
Decimal256Array::from(vec![
i256::from(100),
@@ -1394,10 +1452,12 @@ async fn test_dictionary() {
let reader = TestReader {
scenario: Scenario::Dictionary,
row_per_group: 5,
- };
+ }
+ .build()
+ .await;
Test {
- reader: reader.build().await,
+ reader: &reader,
expected_min: Arc::new(StringArray::from(vec!["abc", "aaa"])),
expected_max: Arc::new(StringArray::from(vec!["def", "fffff"])),
expected_null_counts: UInt64Array::from(vec![1, 0]),
@@ -1407,7 +1467,7 @@ async fn test_dictionary() {
.run();
Test {
- reader: reader.build().await,
+ reader: &reader,
expected_min: Arc::new(StringArray::from(vec!["abc", "aaa"])),
expected_max: Arc::new(StringArray::from(vec!["def", "fffff"])),
expected_null_counts: UInt64Array::from(vec![1, 0]),
@@ -1417,7 +1477,7 @@ async fn test_dictionary() {
.run();
Test {
- reader: reader.build().await,
+ reader: &reader,
expected_min: Arc::new(Int64Array::from(vec![-100, 0])),
expected_max: Arc::new(Int64Array::from(vec![0, 100])),
expected_null_counts: UInt64Array::from(vec![1, 0]),
@@ -1440,11 +1500,13 @@ async fn test_byte() {
let reader = TestReader {
scenario: Scenario::ByteArray,
row_per_group: 5,
- };
+ }
+ .build()
+ .await;
// column "name"
Test {
- reader: reader.build().await,
+ reader: &reader,
expected_min: Arc::new(StringArray::from(vec![
"all frontends",
"mixed",
@@ -1463,7 +1525,7 @@ async fn test_byte() {
// column "service_string"
Test {
- reader: reader.build().await,
+ reader: &reader,
expected_min: Arc::new(StringArray::from(vec![
"frontend five",
"backend one",
@@ -1489,7 +1551,7 @@ async fn test_byte() {
vec![b"frontend two", b"frontend six", b"backend six"];
Test {
- reader: reader.build().await,
+ reader: &reader,
expected_min:
Arc::new(BinaryArray::from(expected_service_binary_min_values)),
expected_max:
Arc::new(BinaryArray::from(expected_service_binary_max_values)),
expected_null_counts: UInt64Array::from(vec![0, 0, 0]),
@@ -1505,7 +1567,7 @@ async fn test_byte() {
let max_input = vec![vec![102, 101, 55], vec![102, 101, 54], vec![98, 101,
56]];
Test {
- reader: reader.build().await,
+ reader: &reader,
expected_min: Arc::new(
FixedSizeBinaryArray::try_from_iter(min_input.into_iter()).unwrap(),
),
@@ -1525,7 +1587,7 @@ async fn test_byte() {
vec![b"frontend two", b"frontend six", b"backend six"];
Test {
- reader: reader.build().await,
+ reader: &reader,
expected_min: Arc::new(LargeBinaryArray::from(
expected_service_large_binary_min_values,
)),
@@ -1547,11 +1609,13 @@ async fn test_period_in_column_names() {
let reader = TestReader {
scenario: Scenario::PeriodsInColumnNames,
row_per_group: 5,
- };
+ }
+ .build()
+ .await;
// column "name"
Test {
- reader: reader.build().await,
+ reader: &reader,
expected_min: Arc::new(StringArray::from(vec![
"HTTP GET / DISPATCH",
"HTTP PUT / DISPATCH",
@@ -1570,7 +1634,7 @@ async fn test_period_in_column_names() {
// column "service.name"
Test {
- reader: reader.build().await,
+ reader: &reader,
expected_min: Arc::new(StringArray::from(vec!["frontend", "backend",
"backend"])),
expected_max: Arc::new(StringArray::from(vec![
"frontend", "frontend", "backend",
@@ -1590,10 +1654,12 @@ async fn test_boolean() {
let reader = TestReader {
scenario: Scenario::Boolean,
row_per_group: 5,
- };
+ }
+ .build()
+ .await;
Test {
- reader: reader.build().await,
+ reader: &reader,
expected_min: Arc::new(BooleanArray::from(vec![false, false])),
expected_max: Arc::new(BooleanArray::from(vec![true, false])),
expected_null_counts: UInt64Array::from(vec![1, 0]),
@@ -1615,9 +1681,11 @@ async fn test_struct() {
let reader = TestReader {
scenario: Scenario::StructArray,
row_per_group: 5,
- };
+ }
+ .build()
+ .await;
Test {
- reader: reader.build().await,
+ reader: &reader,
expected_min: Arc::new(struct_array(vec![(Some(1), Some(6.0),
Some(12.0))])),
expected_max: Arc::new(struct_array(vec![(Some(2), Some(8.5),
Some(14.0))])),
expected_null_counts: UInt64Array::from(vec![0]),
@@ -1633,11 +1701,13 @@ async fn test_utf8() {
let reader = TestReader {
scenario: Scenario::UTF8,
row_per_group: 5,
- };
+ }
+ .build()
+ .await;
// test for utf8
Test {
- reader: reader.build().await,
+ reader: &reader,
expected_min: Arc::new(StringArray::from(vec!["a", "e"])),
expected_max: Arc::new(StringArray::from(vec!["d", "i"])),
expected_null_counts: UInt64Array::from(vec![1, 0]),
@@ -1648,7 +1718,7 @@ async fn test_utf8() {
// test for large_utf8
Test {
- reader: reader.build().await,
+ reader: &reader,
expected_min: Arc::new(LargeStringArray::from(vec!["a", "e"])),
expected_max: Arc::new(LargeStringArray::from(vec!["d", "i"])),
expected_null_counts: UInt64Array::from(vec![1, 0]),
@@ -1667,7 +1737,7 @@ async fn test_missing_statistics() {
parquet_file_one_column_stats(0, 4, 7, row_per_group,
EnabledStatistics::None);
Test {
- reader,
+ reader: &reader,
expected_min: Arc::new(Int64Array::from(vec![None])),
expected_max: Arc::new(Int64Array::from(vec![None])),
expected_null_counts: UInt64Array::from(vec![None]),
@@ -1684,9 +1754,11 @@ async fn test_column_not_found() {
let reader = TestReader {
scenario: Scenario::Dates,
row_per_group: 5,
- };
+ }
+ .build()
+ .await;
Test {
- reader: reader.build().await,
+ reader: &reader,
expected_min: Arc::new(Int64Array::from(vec![18262, 18565])),
expected_max: Arc::new(Int64Array::from(vec![18564, 21865])),
expected_null_counts: UInt64Array::from(vec![2, 2]),
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]