alamb commented on code in PR #2966:
URL: https://github.com/apache/arrow-datafusion/pull/2966#discussion_r931353867
##########
datafusion/core/src/physical_plan/file_format/parquet.rs:
##########
@@ -1367,6 +1465,202 @@ mod tests {
);
}
+ #[test]
+ fn row_group_pruning_predicate_decimal_type() {
+ // For the decimal data type, parquet can use `INT32`, `INT64`,
`BYTE_ARRAY`, `FIXED_LENGTH_BYTE_ARRAY` to
+ // store the data.
+ // In this case, construct four types of statistics to filtered with
the decimal predication.
+
+ // INT32: c1 > 5, the c1 is decimal(9,2)
+ let expr = col("c1").gt(lit(ScalarValue::Decimal128(Some(500), 9, 2)));
+ let schema = Schema::new(vec![Field::new("c1", DataType::Decimal(9,
2), false)]);
+ let schema_descr = get_test_schema_descr(vec![(
+ "c1",
+ PhysicalType::INT32,
+ Some(LogicalType::Decimal {
+ scale: 2,
+ precision: 9,
+ }),
+ Some(9),
+ Some(2),
+ None,
+ )]);
+ let pruning_predicate =
+ PruningPredicate::try_new(expr, Arc::new(schema)).unwrap();
+ let rgm1 = get_row_group_meta_data(
+ &schema_descr,
+ // [1.00, 6.00]
+ // c1 > 5, this row group will be included in the results.
+ vec![ParquetStatistics::int32(
+ Some(100),
+ Some(600),
+ None,
+ 0,
+ false,
+ )],
+ );
+ let rgm2 = get_row_group_meta_data(
+ &schema_descr,
+ // [0.1, 0.2]
+ // c1 > 5, this row group will not be included in the results.
+ vec![ParquetStatistics::int32(Some(10), Some(20), None, 0, false)],
+ );
+ let metrics = parquet_file_metrics();
+ assert_eq!(
+ prune_row_groups(&[rgm1, rgm2], None, Some(pruning_predicate),
&metrics),
+ vec![0]
+ );
+
+ // INT32: c1 > 5, but parquet decimal type has different precision or
scale to arrow decimal
+ // The decimal of arrow is decimal(9,2), the decimal of parquet is
decimal(9,0)
+ let expr = col("c1").gt(lit(ScalarValue::Decimal128(Some(500), 9, 2)));
+ let schema = Schema::new(vec![Field::new("c1", DataType::Decimal(9,
2), false)]);
+ // The decimal of parquet is decimal(9,0)
+ let schema_descr = get_test_schema_descr(vec![(
+ "c1",
+ PhysicalType::INT32,
+ Some(LogicalType::Decimal {
+ scale: 0,
+ precision: 9,
+ }),
+ Some(9),
+ Some(0),
+ None,
+ )]);
+ let pruning_predicate =
+ PruningPredicate::try_new(expr, Arc::new(schema)).unwrap();
+ let rgm1 = get_row_group_meta_data(
+ &schema_descr,
+ // [100, 600]
Review Comment:
> the schema of this parquet column is decimal(9,0), so the actual value of
Some(100) is 100.
Got it -- thank you
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]