alamb commented on code in PR #4255:
URL: https://github.com/apache/arrow-datafusion/pull/4255#discussion_r1027862911
##########
datafusion/core/src/physical_plan/file_format/parquet/page_filter.rs:
##########
@@ -390,16 +405,54 @@ macro_rules! get_min_max_values_for_page_index {
match $self.col_page_indexes {
Index::NONE => None,
Index::INT32(index) => {
- let vec = &index.indexes;
- Some(Arc::new(Int32Array::from_iter(
- vec.iter().map(|x| x.$func().cloned()),
- )))
+ match $self.target_type {
+ // int32 to decimal with the precision and scale
+ Some(DataType::Decimal128(precision, scale)) => {
+ let vec = &index.indexes;
+ let vec: Vec<Option<i128>> = vec
+ .iter()
+ .map(|x| x.min().and_then(|x| Some(*x as i128)))
Review Comment:
I wonder if this this be `$x.$func()` rather than `x.min()`?
##########
datafusion/core/src/physical_plan/file_format/parquet/page_filter.rs:
##########
@@ -390,16 +405,54 @@ macro_rules! get_min_max_values_for_page_index {
match $self.col_page_indexes {
Index::NONE => None,
Index::INT32(index) => {
- let vec = &index.indexes;
- Some(Arc::new(Int32Array::from_iter(
- vec.iter().map(|x| x.$func().cloned()),
- )))
+ match $self.target_type {
+ // int32 to decimal with the precision and scale
+ Some(DataType::Decimal128(precision, scale)) => {
+ let vec = &index.indexes;
+ let vec: Vec<Option<i128>> = vec
+ .iter()
+ .map(|x| x.min().and_then(|x| Some(*x as i128)))
+ .collect();
+ if let Ok(arr) = Decimal128Array::from(vec)
+ .with_precision_and_scale(*precision, *scale)
+ {
+ return Some(Arc::new(arr));
+ } else {
+ return None;
+ }
+ }
+ _ => {
+ let vec = &index.indexes;
+ Some(Arc::new(Int32Array::from_iter(
+ vec.iter().map(|x| x.$func().cloned()),
+ )))
+ }
+ }
}
Index::INT64(index) => {
- let vec = &index.indexes;
- Some(Arc::new(Int64Array::from_iter(
- vec.iter().map(|x| x.$func().cloned()),
- )))
+ match $self.target_type {
+ // int64 to decimal with the precision and scale
+ Some(DataType::Decimal128(precision, scale)) => {
+ let vec = &index.indexes;
+ let vec: Vec<Option<i128>> = vec
+ .iter()
+ .map(|x| x.min().and_then(|x| Some(*x as i128)))
Review Comment:
same question here -- should this be `x.$func()` rather than `x.min()`?
##########
datafusion/core/tests/parquet/page_pruning.rs:
##########
@@ -204,3 +222,466 @@ async fn page_index_filter_multi_col() {
let batch = results.next().await.unwrap().unwrap();
assert_eq!(batch.num_rows(), 7300);
}
+
+async fn test_prune(
Review Comment:
This is great coverage -- thanks @Ted-Jiang. It is somewhat repetitive with
the row group pruning but I think that is ok as they are different code paths
##########
datafusion/core/tests/parquet/page_pruning.rs:
##########
@@ -204,3 +222,466 @@ async fn page_index_filter_multi_col() {
let batch = results.next().await.unwrap().unwrap();
assert_eq!(batch.num_rows(), 7300);
}
+
+async fn test_prune(
+ case_data_type: Scenario,
+ sql: &str,
+ expected_errors: Option<usize>,
+ expected_row_pages_pruned: Option<usize>,
+ expected_results: usize,
+) {
+ let output = ContextWithParquet::new(case_data_type, Page)
+ .await
+ .query(sql)
+ .await;
+
+ println!("{}", output.description());
+ assert_eq!(output.predicate_evaluation_errors(), expected_errors);
+ assert_eq!(output.row_pages_pruned(), expected_row_pages_pruned);
+ assert_eq!(
+ output.result_rows,
+ expected_results,
+ "{}",
+ output.description()
+ );
+}
+
+#[tokio::test]
+// null count min
max
+// page-0 1 2020-01-01T01:01:01.000000000
2020-01-02T01:01:01.000000000
+// page-1 1 2020-01-01T01:01:11.000000000
2020-01-02T01:01:11.000000000
+// page-2 1 2020-01-01T01:11:01.000000000
2020-01-02T01:11:01.000000000
+// page-3 1 2020-01-11T01:01:01.000000000
2020-01-12T01:01:01.000000000
+async fn prune_timestamps_nanos() {
+ test_prune(
+ Scenario::Timestamps,
+ "SELECT * FROM t where nanos < to_timestamp('2020-01-02 01:01:11Z')",
+ Some(0),
+ Some(5),
+ 10,
+ )
+ .await;
+}
+
+#[tokio::test]
+// null count min
max
+// page-0 1 2020-01-01T01:01:01.000000
2020-01-02T01:01:01.000000
+// page-1 1 2020-01-01T01:01:11.000000
2020-01-02T01:01:11.000000
+// page-2 1 2020-01-01T01:11:01.000000
2020-01-02T01:11:01.000000
+// page-3 1 2020-01-11T01:01:01.000000
2020-01-12T01:01:01.000000
+async fn prune_timestamps_micros() {
+ test_prune(
+ Scenario::Timestamps,
+ "SELECT * FROM t where micros < to_timestamp_micros('2020-01-02
01:01:11Z')",
+ Some(0),
+ Some(5),
+ 10,
+ )
+ .await;
+}
+
+#[tokio::test]
+// null count min
max
+// page-0 1 2020-01-01T01:01:01.000
2020-01-02T01:01:01.000
+// page-1 1 2020-01-01T01:01:11.000
2020-01-02T01:01:11.000
+// page-2 1 2020-01-01T01:11:01.000
2020-01-02T01:11:01.000
+// page-3 1 2020-01-11T01:01:01.000
2020-01-12T01:01:01.000
+async fn prune_timestamps_millis() {
+ test_prune(
+ Scenario::Timestamps,
+ "SELECT * FROM t where millis < to_timestamp_millis('2020-01-02
01:01:11Z')",
+ Some(0),
+ Some(5),
+ 10,
+ )
+ .await;
+}
+
+#[tokio::test]
+// null count min
max
+// page-0 1 1577840461
1577926861
+// page-1 1 1577840471
1577926871
+// page-2 1 1577841061
1577927461
+// page-3 1 1578704461
1578790861
+
+async fn prune_timestamps_seconds() {
+ test_prune(
+ Scenario::Timestamps,
+ "SELECT * FROM t where seconds < to_timestamp_seconds('2020-01-02
01:01:11Z')",
+ Some(0),
+ Some(5),
+ 10,
+ )
+ .await;
+}
+
+#[tokio::test]
+// null count min
max
+// page-0 1 2020-01-01
2020-01-04
+// page-1 1 2020-01-11
2020-01-14
+// page-2 1 2020-10-27
2020-10-30
+// page-3 1 2029-11-09
2029-11-12
+async fn prune_date32() {
+ test_prune(
+ Scenario::Dates,
+ "SELECT * FROM t where date32 < cast('2020-01-02' as date)",
+ Some(0),
+ Some(15),
+ 1,
+ )
+ .await;
+}
+
+#[tokio::test]
+// null count min
max
+// page-0 1 2020-01-01
2020-01-04
+// page-1 1 2020-01-11
2020-01-14
+// page-2 1 2020-10-27
2020-10-30
+// page-3 1 2029-11-09
2029-11-12
+async fn prune_date64() {
+ // work around for not being able to cast Date32 to Date64 automatically
+ let date = "2020-01-02"
+ .parse::<chrono::NaiveDate>()
+ .unwrap()
+ .and_time(chrono::NaiveTime::from_hms_opt(0, 0, 0).unwrap());
+ let date = ScalarValue::Date64(Some(date.timestamp_millis()));
+
+ let output = ContextWithParquet::new(Scenario::Dates, Page)
+ .await
+ .query_with_expr(col("date64").lt(lit(date)))
+ .await;
+
+ println!("{}", output.description());
+ // This should prune out groups without error
+ assert_eq!(output.predicate_evaluation_errors(), Some(0));
+ assert_eq!(output.row_pages_pruned(), Some(15));
+ assert_eq!(output.result_rows, 1, "{}", output.description());
+}
+
+#[tokio::test]
+// null count min
max
+// page-0 0 -5
-1
+// page-1 0 -4 0
+// page-2 0 0
4
+// page-3 0 5
9
+async fn prune_int32_lt() {
+ test_prune(
+ Scenario::Int32,
+ "SELECT * FROM t where i < 1",
+ Some(0),
+ Some(5),
+ 11,
+ )
+ .await;
+ // result of sql "SELECT * FROM t where i < 1" is same as
+ // "SELECT * FROM t where -i > -1"
+ test_prune(
+ Scenario::Int32,
+ "SELECT * FROM t where -i > -1",
+ Some(0),
+ Some(5),
+ 11,
+ )
+ .await;
+}
+
+#[tokio::test]
+async fn prune_int32_eq() {
+ test_prune(
+ Scenario::Int32,
+ "SELECT * FROM t where i = 1",
+ Some(0),
+ Some(15),
+ 1,
+ )
+ .await;
+}
+#[tokio::test]
+#[ignore]
+async fn prune_int32_scalar_fun_and_eq() {
+ test_prune(
+ Scenario::Int32,
+ "SELECT * FROM t where abs(i) = 1 and i = 1",
+ Some(0),
+ Some(15),
+ 1,
+ )
+ .await;
+}
+
+#[tokio::test]
+#[ignore]
+async fn prune_int32_scalar_fun() {
+ test_prune(
+ Scenario::Int32,
+ "SELECT * FROM t where abs(i) = 1",
+ Some(0),
+ Some(0),
+ 3,
+ )
+ .await;
+}
+
+#[tokio::test]
+#[ignore]
+async fn prune_int32_complex_expr() {
+ test_prune(
+ Scenario::Int32,
+ "SELECT * FROM t where i+1 = 1",
+ Some(0),
+ Some(0),
+ 2,
+ )
+ .await;
+}
+
+#[tokio::test]
+#[ignore]
+async fn prune_int32_complex_expr_subtract() {
+ test_prune(
+ Scenario::Int32,
+ "SELECT * FROM t where 1-i > 1",
+ Some(0),
+ Some(0),
+ 9,
+ )
+ .await;
+}
+
+#[tokio::test]
+// null count min
max
+// page-0 0 -5.0
-1.0
+// page-1 0 -4.0
0.0
+// page-2 0 0.0
4.0
+// page-3 0 5.0
9.0
+async fn prune_f64_lt() {
+ test_prune(
+ Scenario::Float64,
+ "SELECT * FROM t where f < 1",
+ Some(0),
+ Some(5),
+ 11,
+ )
+ .await;
+ test_prune(
+ Scenario::Float64,
+ "SELECT * FROM t where -f > -1",
+ Some(0),
+ Some(5),
+ 11,
+ )
+ .await;
+}
+
+#[tokio::test]
+#[ignore]
+async fn prune_f64_scalar_fun_and_gt() {
+ // result of sql "SELECT * FROM t where abs(f - 1) <= 0.000001 and f >=
0.1"
+ // only use "f >= 0" to prune
+ test_prune(
+ Scenario::Float64,
+ "SELECT * FROM t where abs(f - 1) <= 0.000001 and f >= 0.1",
+ Some(0),
+ Some(2),
+ 1,
+ )
+ .await;
+}
+
+#[tokio::test]
+#[ignore]
+async fn prune_f64_scalar_fun() {
+ // result of sql "SELECT * FROM t where abs(f-1) <= 0.000001" is not
supported
+ test_prune(
+ Scenario::Float64,
+ "SELECT * FROM t where abs(f-1) <= 0.000001",
+ Some(0),
+ Some(0),
+ 1,
+ )
+ .await;
+}
+
+#[tokio::test]
+#[ignore]
+async fn prune_f64_complex_expr() {
+ // result of sql "SELECT * FROM t where f+1 > 1.1"" is not supported
+ test_prune(
+ Scenario::Float64,
+ "SELECT * FROM t where f+1 > 1.1",
+ Some(0),
+ Some(0),
+ 9,
+ )
+ .await;
+}
+
+#[tokio::test]
+#[ignore]
+async fn prune_f64_complex_expr_subtract() {
+ // result of sql "SELECT * FROM t where 1-f > 1" is not supported
+ test_prune(
+ Scenario::Float64,
+ "SELECT * FROM t where 1-f > 1",
+ Some(0),
+ Some(0),
+ 9,
+ )
+ .await;
+}
+
+#[tokio::test]
+// null count min
max
+// page-0 0 -5
-1
+// page-1 0 -4 0
+// page-2 0 0
4
+// page-3 0 5
9
+async fn prune_int32_eq_in_list() {
+ // result of sql "SELECT * FROM t where in (1)"
+ test_prune(
+ Scenario::Int32,
+ "SELECT * FROM t where i in (1)",
+ Some(0),
+ Some(15),
+ 1,
+ )
+ .await;
+}
+
+#[tokio::test]
+async fn prune_int32_eq_in_list_negated() {
+ // result of sql "SELECT * FROM t where not in (1)" prune nothing
+ test_prune(
+ Scenario::Int32,
+ "SELECT * FROM t where i not in (1)",
+ Some(0),
+ Some(0),
+ 19,
+ )
+ .await;
+}
+
+#[tokio::test]
+async fn prune_decimal_lt() {
+ // The data type of decimal_col is decimal(9,2)
+ // There are three pages each 5 rows:
+ // [1.00, 6.00], [-5.00,6.00], [20.00,60.00]
+ test_prune(
+ Scenario::Decimal,
+ "SELECT * FROM t where decimal_col < 4",
+ Some(0),
+ Some(5),
+ 6,
+ )
+ .await;
+ // compare with the casted decimal value
+ test_prune(
+ Scenario::Decimal,
+ "SELECT * FROM t where decimal_col < cast(4.55 as decimal(20,2))",
+ Some(0),
+ Some(5),
+ 8,
+ )
+ .await;
+
+ // The data type of decimal_col is decimal(38,2)
+ test_prune(
+ Scenario::DecimalLargePrecision,
+ "SELECT * FROM t where decimal_col < 4",
+ Some(0),
+ Some(5),
+ 6,
+ )
+ .await;
+ // compare with the casted decimal value
+ test_prune(
+ Scenario::DecimalLargePrecision,
+ "SELECT * FROM t where decimal_col < cast(4.55 as decimal(20,2))",
+ Some(0),
+ Some(5),
+ 8,
+ )
+ .await;
+}
+
+#[tokio::test]
+async fn prune_decimal_eq() {
Review Comment:
it might be worth another test that prunes something other than 5 rows --
maybe `where decimal_col = 30.00` and prunes out the other pages? All of the
tests here seem to prune out only the third page 20.00 -> 60.00
##########
datafusion/core/tests/parquet/page_pruning.rs:
##########
@@ -204,3 +222,466 @@ async fn page_index_filter_multi_col() {
let batch = results.next().await.unwrap().unwrap();
assert_eq!(batch.num_rows(), 7300);
}
+
+async fn test_prune(
+ case_data_type: Scenario,
+ sql: &str,
+ expected_errors: Option<usize>,
+ expected_row_pages_pruned: Option<usize>,
+ expected_results: usize,
+) {
+ let output = ContextWithParquet::new(case_data_type, Page)
+ .await
+ .query(sql)
+ .await;
+
+ println!("{}", output.description());
+ assert_eq!(output.predicate_evaluation_errors(), expected_errors);
+ assert_eq!(output.row_pages_pruned(), expected_row_pages_pruned);
+ assert_eq!(
+ output.result_rows,
+ expected_results,
+ "{}",
+ output.description()
+ );
+}
+
+#[tokio::test]
+// null count min
max
+// page-0 1 2020-01-01T01:01:01.000000000
2020-01-02T01:01:01.000000000
+// page-1 1 2020-01-01T01:01:11.000000000
2020-01-02T01:01:11.000000000
+// page-2 1 2020-01-01T01:11:01.000000000
2020-01-02T01:11:01.000000000
+// page-3 1 2020-01-11T01:01:01.000000000
2020-01-12T01:01:01.000000000
+async fn prune_timestamps_nanos() {
+ test_prune(
+ Scenario::Timestamps,
+ "SELECT * FROM t where nanos < to_timestamp('2020-01-02 01:01:11Z')",
+ Some(0),
+ Some(5),
+ 10,
+ )
+ .await;
+}
+
+#[tokio::test]
+// null count min
max
+// page-0 1 2020-01-01T01:01:01.000000
2020-01-02T01:01:01.000000
+// page-1 1 2020-01-01T01:01:11.000000
2020-01-02T01:01:11.000000
+// page-2 1 2020-01-01T01:11:01.000000
2020-01-02T01:11:01.000000
+// page-3 1 2020-01-11T01:01:01.000000
2020-01-12T01:01:01.000000
+async fn prune_timestamps_micros() {
+ test_prune(
+ Scenario::Timestamps,
+ "SELECT * FROM t where micros < to_timestamp_micros('2020-01-02
01:01:11Z')",
+ Some(0),
+ Some(5),
+ 10,
+ )
+ .await;
+}
+
+#[tokio::test]
+// null count min
max
+// page-0 1 2020-01-01T01:01:01.000
2020-01-02T01:01:01.000
+// page-1 1 2020-01-01T01:01:11.000
2020-01-02T01:01:11.000
+// page-2 1 2020-01-01T01:11:01.000
2020-01-02T01:11:01.000
+// page-3 1 2020-01-11T01:01:01.000
2020-01-12T01:01:01.000
+async fn prune_timestamps_millis() {
+ test_prune(
+ Scenario::Timestamps,
+ "SELECT * FROM t where millis < to_timestamp_millis('2020-01-02
01:01:11Z')",
+ Some(0),
+ Some(5),
+ 10,
+ )
+ .await;
+}
+
+#[tokio::test]
+// null count min
max
+// page-0 1 1577840461
1577926861
+// page-1 1 1577840471
1577926871
+// page-2 1 1577841061
1577927461
+// page-3 1 1578704461
1578790861
+
+async fn prune_timestamps_seconds() {
+ test_prune(
+ Scenario::Timestamps,
+ "SELECT * FROM t where seconds < to_timestamp_seconds('2020-01-02
01:01:11Z')",
+ Some(0),
+ Some(5),
+ 10,
+ )
+ .await;
+}
+
+#[tokio::test]
+// null count min
max
+// page-0 1 2020-01-01
2020-01-04
+// page-1 1 2020-01-11
2020-01-14
+// page-2 1 2020-10-27
2020-10-30
+// page-3 1 2029-11-09
2029-11-12
+async fn prune_date32() {
+ test_prune(
+ Scenario::Dates,
+ "SELECT * FROM t where date32 < cast('2020-01-02' as date)",
+ Some(0),
+ Some(15),
+ 1,
+ )
+ .await;
+}
+
+#[tokio::test]
+// null count min
max
+// page-0 1 2020-01-01
2020-01-04
+// page-1 1 2020-01-11
2020-01-14
+// page-2 1 2020-10-27
2020-10-30
+// page-3 1 2029-11-09
2029-11-12
+async fn prune_date64() {
+ // work around for not being able to cast Date32 to Date64 automatically
+ let date = "2020-01-02"
+ .parse::<chrono::NaiveDate>()
+ .unwrap()
+ .and_time(chrono::NaiveTime::from_hms_opt(0, 0, 0).unwrap());
+ let date = ScalarValue::Date64(Some(date.timestamp_millis()));
+
+ let output = ContextWithParquet::new(Scenario::Dates, Page)
+ .await
+ .query_with_expr(col("date64").lt(lit(date)))
+ .await;
+
+ println!("{}", output.description());
+ // This should prune out groups without error
+ assert_eq!(output.predicate_evaluation_errors(), Some(0));
+ assert_eq!(output.row_pages_pruned(), Some(15));
+ assert_eq!(output.result_rows, 1, "{}", output.description());
+}
+
+#[tokio::test]
+// null count min
max
+// page-0 0 -5
-1
+// page-1 0 -4 0
+// page-2 0 0
4
+// page-3 0 5
9
+async fn prune_int32_lt() {
+ test_prune(
+ Scenario::Int32,
+ "SELECT * FROM t where i < 1",
+ Some(0),
+ Some(5),
+ 11,
+ )
+ .await;
+ // result of sql "SELECT * FROM t where i < 1" is same as
+ // "SELECT * FROM t where -i > -1"
+ test_prune(
+ Scenario::Int32,
+ "SELECT * FROM t where -i > -1",
+ Some(0),
+ Some(5),
+ 11,
+ )
+ .await;
+}
+
+#[tokio::test]
+async fn prune_int32_eq() {
+ test_prune(
+ Scenario::Int32,
+ "SELECT * FROM t where i = 1",
+ Some(0),
+ Some(15),
+ 1,
+ )
+ .await;
+}
+#[tokio::test]
+#[ignore]
+async fn prune_int32_scalar_fun_and_eq() {
+ test_prune(
+ Scenario::Int32,
+ "SELECT * FROM t where abs(i) = 1 and i = 1",
+ Some(0),
+ Some(15),
+ 1,
+ )
+ .await;
+}
+
+#[tokio::test]
+#[ignore]
+async fn prune_int32_scalar_fun() {
+ test_prune(
+ Scenario::Int32,
+ "SELECT * FROM t where abs(i) = 1",
+ Some(0),
+ Some(0),
+ 3,
+ )
+ .await;
+}
+
+#[tokio::test]
+#[ignore]
+async fn prune_int32_complex_expr() {
+ test_prune(
+ Scenario::Int32,
+ "SELECT * FROM t where i+1 = 1",
+ Some(0),
+ Some(0),
+ 2,
+ )
+ .await;
+}
+
+#[tokio::test]
+#[ignore]
+async fn prune_int32_complex_expr_subtract() {
+ test_prune(
+ Scenario::Int32,
+ "SELECT * FROM t where 1-i > 1",
+ Some(0),
+ Some(0),
+ 9,
+ )
+ .await;
+}
+
+#[tokio::test]
+// null count min
max
+// page-0 0 -5.0
-1.0
+// page-1 0 -4.0
0.0
+// page-2 0 0.0
4.0
+// page-3 0 5.0
9.0
+async fn prune_f64_lt() {
+ test_prune(
+ Scenario::Float64,
+ "SELECT * FROM t where f < 1",
+ Some(0),
+ Some(5),
+ 11,
+ )
+ .await;
+ test_prune(
+ Scenario::Float64,
+ "SELECT * FROM t where -f > -1",
+ Some(0),
+ Some(5),
+ 11,
+ )
+ .await;
+}
+
+#[tokio::test]
+#[ignore]
+async fn prune_f64_scalar_fun_and_gt() {
+ // result of sql "SELECT * FROM t where abs(f - 1) <= 0.000001 and f >=
0.1"
+ // only use "f >= 0" to prune
+ test_prune(
+ Scenario::Float64,
+ "SELECT * FROM t where abs(f - 1) <= 0.000001 and f >= 0.1",
+ Some(0),
+ Some(2),
+ 1,
+ )
+ .await;
+}
+
+#[tokio::test]
+#[ignore]
+async fn prune_f64_scalar_fun() {
+ // result of sql "SELECT * FROM t where abs(f-1) <= 0.000001" is not
supported
+ test_prune(
+ Scenario::Float64,
+ "SELECT * FROM t where abs(f-1) <= 0.000001",
+ Some(0),
+ Some(0),
+ 1,
+ )
+ .await;
+}
+
+#[tokio::test]
+#[ignore]
+async fn prune_f64_complex_expr() {
+ // result of sql "SELECT * FROM t where f+1 > 1.1"" is not supported
+ test_prune(
+ Scenario::Float64,
+ "SELECT * FROM t where f+1 > 1.1",
+ Some(0),
+ Some(0),
+ 9,
+ )
+ .await;
+}
+
+#[tokio::test]
+#[ignore]
Review Comment:
I wonder if we have to run "type coercion / simplifiction" on them first?
##########
datafusion/core/src/physical_plan/file_format/parquet/page_filter.rs:
##########
@@ -419,10 +468,37 @@ macro_rules! get_min_max_values_for_page_index {
vec.iter().map(|x| x.$func().cloned()),
)))
}
- Index::INT96(_) | Index::BYTE_ARRAY(_) |
Index::FIXED_LEN_BYTE_ARRAY(_) => {
+ Index::BYTE_ARRAY(index) => {
+ let vec = &index.indexes;
Review Comment:
Addition additional support in a follow on PR sounds like a good idea to me
-- maybe we can file a ticket to track the work
##########
datafusion/core/src/physical_plan/file_format/parquet/page_filter.rs:
##########
@@ -390,16 +405,54 @@ macro_rules! get_min_max_values_for_page_index {
match $self.col_page_indexes {
Index::NONE => None,
Index::INT32(index) => {
- let vec = &index.indexes;
- Some(Arc::new(Int32Array::from_iter(
- vec.iter().map(|x| x.$func().cloned()),
- )))
+ match $self.target_type {
+ // int32 to decimal with the precision and scale
+ Some(DataType::Decimal128(precision, scale)) => {
+ let vec = &index.indexes;
+ let vec: Vec<Option<i128>> = vec
+ .iter()
+ .map(|x| x.min().and_then(|x| Some(*x as i128)))
+ .collect();
+ if let Ok(arr) = Decimal128Array::from(vec)
+ .with_precision_and_scale(*precision, *scale)
+ {
+ return Some(Arc::new(arr));
+ } else {
+ return None;
+ }
Review Comment:
You might be able to this more functionally with something like (untested):
```suggestion
Decimal128Array::from(vec)
.with_precision_and_scale(*precision, *scale)
.ok()
.map(|arr| Arc::new(arr))
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]