liukun4515 commented on code in PR #2966:
URL: https://github.com/apache/arrow-datafusion/pull/2966#discussion_r930656305
##########
datafusion/core/src/physical_plan/file_format/parquet.rs:
##########
@@ -1367,6 +1465,202 @@ mod tests {
);
}
+ #[test]
+ fn row_group_pruning_predicate_decimal_type() {
+ // For the decimal data type, parquet can use `INT32`, `INT64`,
`BYTE_ARRAY`, `FIXED_LENGTH_BYTE_ARRAY` to
+ // store the data.
+ // In this case, construct four types of statistics to filtered with
the decimal predication.
+
+ // INT32: c1 > 5, the c1 is decimal(9,2)
+ let expr = col("c1").gt(lit(ScalarValue::Decimal128(Some(500), 9, 2)));
+ let schema = Schema::new(vec![Field::new("c1", DataType::Decimal(9,
2), false)]);
+ let schema_descr = get_test_schema_descr(vec![(
+ "c1",
+ PhysicalType::INT32,
+ Some(LogicalType::Decimal {
+ scale: 2,
+ precision: 9,
+ }),
+ Some(9),
+ Some(2),
+ None,
+ )]);
+ let pruning_predicate =
+ PruningPredicate::try_new(expr, Arc::new(schema)).unwrap();
+ let rgm1 = get_row_group_meta_data(
+ &schema_descr,
+ // [1.00, 6.00]
+ // c1 > 5, this row group will be included in the results.
+ vec![ParquetStatistics::int32(
+ Some(100),
+ Some(600),
+ None,
+ 0,
+ false,
+ )],
+ );
+ let rgm2 = get_row_group_meta_data(
+ &schema_descr,
+ // [0.1, 0.2]
+ // c1 > 5, this row group will not be included in the results.
+ vec![ParquetStatistics::int32(Some(10), Some(20), None, 0, false)],
+ );
+ let metrics = parquet_file_metrics();
+ assert_eq!(
+ prune_row_groups(&[rgm1, rgm2], None, Some(pruning_predicate),
&metrics),
+ vec![0]
+ );
+
+ // INT32: c1 > 5, but parquet decimal type has different precision or
scale to arrow decimal
Review Comment:
I will change the arrow data type from `decimal(9,2)` to `decimal(5,2)`.
This changes will bring a different precision and scale when compare/filter
data.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]