liukun4515 commented on code in PR #2966:
URL: https://github.com/apache/arrow-datafusion/pull/2966#discussion_r930630167


##########
datafusion/core/src/physical_plan/file_format/parquet.rs:
##########
@@ -1367,6 +1465,202 @@ mod tests {
         );
     }
 
+    #[test]
+    fn row_group_pruning_predicate_decimal_type() {
+        // For the decimal data type, parquet can use `INT32`, `INT64`, 
`BYTE_ARRAY`, `FIXED_LENGTH_BYTE_ARRAY` to
+        // store the data.
+        // In this case, construct four types of statistics to filtered with 
the decimal predication.
+
+        // INT32: c1 > 5, the c1 is decimal(9,2)
+        let expr = col("c1").gt(lit(ScalarValue::Decimal128(Some(500), 9, 2)));
+        let schema = Schema::new(vec![Field::new("c1", DataType::Decimal(9, 
2), false)]);
+        let schema_descr = get_test_schema_descr(vec![(
+            "c1",
+            PhysicalType::INT32,
+            Some(LogicalType::Decimal {
+                scale: 2,
+                precision: 9,
+            }),
+            Some(9),
+            Some(2),
+            None,
+        )]);
+        let pruning_predicate =
+            PruningPredicate::try_new(expr, Arc::new(schema)).unwrap();
+        let rgm1 = get_row_group_meta_data(
+            &schema_descr,
+            // [1.00, 6.00]
+            // c1 > 5, this row group will be included in the results.
+            vec![ParquetStatistics::int32(
+                Some(100),
+                Some(600),
+                None,
+                0,
+                false,
+            )],
+        );
+        let rgm2 = get_row_group_meta_data(
+            &schema_descr,
+            // [0.1, 0.2]
+            // c1 > 5, this row group will not be included in the results.
+            vec![ParquetStatistics::int32(Some(10), Some(20), None, 0, false)],
+        );
+        let metrics = parquet_file_metrics();
+        assert_eq!(
+            prune_row_groups(&[rgm1, rgm2], None, Some(pruning_predicate), 
&metrics),
+            vec![0]
+        );
+
+        // INT32: c1 > 5, but parquet decimal type has different precision or 
scale to arrow decimal
+        // The decimal of arrow is decimal(9,2), the decimal of parquet is 
decimal(9,0)
+        let expr = col("c1").gt(lit(ScalarValue::Decimal128(Some(500), 9, 2)));
+        let schema = Schema::new(vec![Field::new("c1", DataType::Decimal(9, 
2), false)]);
+        // The decimal of parquet is decimal(9,0)
+        let schema_descr = get_test_schema_descr(vec![(
+            "c1",
+            PhysicalType::INT32,
+            Some(LogicalType::Decimal {
+                scale: 0,
+                precision: 9,
+            }),
+            Some(9),
+            Some(0),
+            None,
+        )]);
+        let pruning_predicate =
+            PruningPredicate::try_new(expr, Arc::new(schema)).unwrap();
+        let rgm1 = get_row_group_meta_data(
+            &schema_descr,
+            // [100, 600]

Review Comment:
   But the schema of this parquet column  is decimal(9,0), so the actual value 
of `Some(100)` is 100.
   
   The parquet just store the unscaled number, this is define by the parquet 
spec format.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to