alamb commented on code in PR #4255:
URL: https://github.com/apache/arrow-datafusion/pull/4255#discussion_r1028151323


##########
datafusion/core/tests/parquet/page_pruning.rs:
##########
@@ -204,3 +222,466 @@ async fn page_index_filter_multi_col() {
     let batch = results.next().await.unwrap().unwrap();
     assert_eq!(batch.num_rows(), 7300);
 }
+
+async fn test_prune(
+    case_data_type: Scenario,
+    sql: &str,
+    expected_errors: Option<usize>,
+    expected_row_pages_pruned: Option<usize>,
+    expected_results: usize,
+) {
+    let output = ContextWithParquet::new(case_data_type, Page)
+        .await
+        .query(sql)
+        .await;
+
+    println!("{}", output.description());
+    assert_eq!(output.predicate_evaluation_errors(), expected_errors);
+    assert_eq!(output.row_pages_pruned(), expected_row_pages_pruned);
+    assert_eq!(
+        output.result_rows,
+        expected_results,
+        "{}",
+        output.description()
+    );
+}
+
+#[tokio::test]
+//                       null count  min                                       
max
+// page-0                         1  2020-01-01T01:01:01.000000000             
2020-01-02T01:01:01.000000000
+// page-1                         1  2020-01-01T01:01:11.000000000             
2020-01-02T01:01:11.000000000
+// page-2                         1  2020-01-01T01:11:01.000000000             
2020-01-02T01:11:01.000000000
+// page-3                         1  2020-01-11T01:01:01.000000000             
2020-01-12T01:01:01.000000000
+async fn prune_timestamps_nanos() {
+    test_prune(
+        Scenario::Timestamps,
+        "SELECT * FROM t where nanos < to_timestamp('2020-01-02 01:01:11Z')",
+        Some(0),
+        Some(5),
+        10,
+    )
+    .await;
+}
+
+#[tokio::test]
+//                         null count  min                                     
  max
+// page-0                         1  2020-01-01T01:01:01.000000                
2020-01-02T01:01:01.000000
+// page-1                         1  2020-01-01T01:01:11.000000                
2020-01-02T01:01:11.000000
+// page-2                         1  2020-01-01T01:11:01.000000                
2020-01-02T01:11:01.000000
+// page-3                         1  2020-01-11T01:01:01.000000                
2020-01-12T01:01:01.000000
+async fn prune_timestamps_micros() {
+    test_prune(
+        Scenario::Timestamps,
+        "SELECT * FROM t where micros < to_timestamp_micros('2020-01-02 
01:01:11Z')",
+        Some(0),
+        Some(5),
+        10,
+    )
+    .await;
+}
+
+#[tokio::test]
+//                      null count  min                                       
max
+// page-0                         1  2020-01-01T01:01:01.000                   
2020-01-02T01:01:01.000
+// page-1                         1  2020-01-01T01:01:11.000                   
2020-01-02T01:01:11.000
+// page-2                         1  2020-01-01T01:11:01.000                   
2020-01-02T01:11:01.000
+// page-3                         1  2020-01-11T01:01:01.000                   
2020-01-12T01:01:01.000
+async fn prune_timestamps_millis() {
+    test_prune(
+        Scenario::Timestamps,
+        "SELECT * FROM t where millis < to_timestamp_millis('2020-01-02 
01:01:11Z')",
+        Some(0),
+        Some(5),
+        10,
+    )
+    .await;
+}
+
+#[tokio::test]
+//                      null count  min                                       
max
+// page-0                         1  1577840461                                
1577926861
+// page-1                         1  1577840471                                
1577926871
+// page-2                         1  1577841061                                
1577927461
+// page-3                         1  1578704461                                
1578790861
+
+async fn prune_timestamps_seconds() {
+    test_prune(
+        Scenario::Timestamps,
+        "SELECT * FROM t where seconds < to_timestamp_seconds('2020-01-02 
01:01:11Z')",
+        Some(0),
+        Some(5),
+        10,
+    )
+    .await;
+}
+
+#[tokio::test]
+//                       null count  min                                       
max
+// page-0                         1  2020-01-01                                
2020-01-04
+// page-1                         1  2020-01-11                                
2020-01-14
+// page-2                         1  2020-10-27                                
2020-10-30
+// page-3                         1  2029-11-09                                
2029-11-12
+async fn prune_date32() {
+    test_prune(
+        Scenario::Dates,
+        "SELECT * FROM t where date32 < cast('2020-01-02' as date)",
+        Some(0),
+        Some(15),
+        1,
+    )
+    .await;
+}
+
+#[tokio::test]
+//                      null count  min                                       
max
+// page-0                         1  2020-01-01                                
2020-01-04
+// page-1                         1  2020-01-11                                
2020-01-14
+// page-2                         1  2020-10-27                                
2020-10-30
+// page-3                         1  2029-11-09                                
2029-11-12
+async fn prune_date64() {
+    // work around for not being able to cast Date32 to Date64 automatically
+    let date = "2020-01-02"
+        .parse::<chrono::NaiveDate>()
+        .unwrap()
+        .and_time(chrono::NaiveTime::from_hms_opt(0, 0, 0).unwrap());
+    let date = ScalarValue::Date64(Some(date.timestamp_millis()));
+
+    let output = ContextWithParquet::new(Scenario::Dates, Page)
+        .await
+        .query_with_expr(col("date64").lt(lit(date)))
+        .await;
+
+    println!("{}", output.description());
+    // This should prune out groups  without error
+    assert_eq!(output.predicate_evaluation_errors(), Some(0));
+    assert_eq!(output.row_pages_pruned(), Some(15));
+    assert_eq!(output.result_rows, 1, "{}", output.description());
+}
+
+#[tokio::test]
+//                      null count  min                                       
max
+// page-0                         0  -5                                        
-1
+// page-1                         0  -4                                        0
+// page-2                         0  0                                         
4
+// page-3                         0  5                                         
9
+async fn prune_int32_lt() {
+    test_prune(
+        Scenario::Int32,
+        "SELECT * FROM t where i < 1",
+        Some(0),
+        Some(5),
+        11,
+    )
+    .await;
+    // result of sql "SELECT * FROM t where i < 1" is same as
+    // "SELECT * FROM t where -i > -1"
+    test_prune(
+        Scenario::Int32,
+        "SELECT * FROM t where -i > -1",
+        Some(0),
+        Some(5),
+        11,
+    )
+    .await;
+}
+
+#[tokio::test]
+async fn prune_int32_eq() {
+    test_prune(
+        Scenario::Int32,
+        "SELECT * FROM t where i = 1",
+        Some(0),
+        Some(15),
+        1,
+    )
+    .await;
+}
+#[tokio::test]
+#[ignore]
+async fn prune_int32_scalar_fun_and_eq() {
+    test_prune(
+        Scenario::Int32,
+        "SELECT * FROM t where abs(i) = 1  and i = 1",
+        Some(0),
+        Some(15),
+        1,
+    )
+    .await;
+}
+
+#[tokio::test]
+#[ignore]
+async fn prune_int32_scalar_fun() {
+    test_prune(
+        Scenario::Int32,
+        "SELECT * FROM t where abs(i) = 1",
+        Some(0),
+        Some(0),
+        3,
+    )
+    .await;
+}
+
+#[tokio::test]
+#[ignore]
+async fn prune_int32_complex_expr() {
+    test_prune(
+        Scenario::Int32,
+        "SELECT * FROM t where i+1 = 1",
+        Some(0),
+        Some(0),
+        2,
+    )
+    .await;
+}
+
+#[tokio::test]
+#[ignore]
+async fn prune_int32_complex_expr_subtract() {
+    test_prune(
+        Scenario::Int32,
+        "SELECT * FROM t where 1-i > 1",
+        Some(0),
+        Some(0),
+        9,
+    )
+    .await;
+}
+
+#[tokio::test]
+//                      null count  min                                       
max
+// page-0                         0  -5.0                                      
-1.0
+// page-1                         0  -4.0                                      
0.0
+// page-2                         0  0.0                                       
4.0
+// page-3                         0  5.0                                       
9.0
+async fn prune_f64_lt() {
+    test_prune(
+        Scenario::Float64,
+        "SELECT * FROM t where f < 1",
+        Some(0),
+        Some(5),
+        11,
+    )
+    .await;
+    test_prune(
+        Scenario::Float64,
+        "SELECT * FROM t where -f > -1",
+        Some(0),
+        Some(5),
+        11,
+    )
+    .await;
+}
+
+#[tokio::test]
+#[ignore]
+async fn prune_f64_scalar_fun_and_gt() {
+    // result of sql "SELECT * FROM t where abs(f - 1) <= 0.000001  and f >= 
0.1"
+    // only use "f >= 0" to prune
+    test_prune(
+        Scenario::Float64,
+        "SELECT * FROM t where abs(f - 1) <= 0.000001  and f >= 0.1",
+        Some(0),
+        Some(2),
+        1,
+    )
+    .await;
+}
+
+#[tokio::test]
+#[ignore]
+async fn prune_f64_scalar_fun() {
+    // result of sql "SELECT * FROM t where abs(f-1) <= 0.000001" is not 
supported
+    test_prune(
+        Scenario::Float64,
+        "SELECT * FROM t where abs(f-1) <= 0.000001",
+        Some(0),
+        Some(0),
+        1,
+    )
+    .await;
+}
+
+#[tokio::test]
+#[ignore]
+async fn prune_f64_complex_expr() {
+    // result of sql "SELECT * FROM t where f+1 > 1.1"" is not supported
+    test_prune(
+        Scenario::Float64,
+        "SELECT * FROM t where f+1 > 1.1",
+        Some(0),
+        Some(0),
+        9,
+    )
+    .await;
+}
+
+#[tokio::test]
+#[ignore]

Review Comment:
   > I wonder if we have to run "type coercion / simplifiction" on them first?
   
   Did rowGroup run this "type coercion / simplifiction"  🤔 ? I think they are 
the same code path, i will find it out soon. 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to