Ted-Jiang commented on code in PR #4255:
URL: https://github.com/apache/arrow-datafusion/pull/4255#discussion_r1026585801
##########
datafusion/core/tests/parquet/page_pruning.rs:
##########
@@ -204,3 +222,466 @@ async fn page_index_filter_multi_col() {
let batch = results.next().await.unwrap().unwrap();
assert_eq!(batch.num_rows(), 7300);
}
+
+async fn test_prune(
+ case_data_type: Scenario,
+ sql: &str,
+ expected_errors: Option<usize>,
+ expected_row_pages_pruned: Option<usize>,
+ expected_results: usize,
+) {
+ let output = ContextWithParquet::new(case_data_type, Page)
+ .await
+ .query(sql)
+ .await;
+
+ println!("{}", output.description());
+ assert_eq!(output.predicate_evaluation_errors(), expected_errors);
+ assert_eq!(output.row_pages_pruned(), expected_row_pages_pruned);
+ assert_eq!(
+ output.result_rows,
+ expected_results,
+ "{}",
+ output.description()
+ );
+}
+
+#[tokio::test]
+// null count min
max
+// page-0 1 2020-01-01T01:01:01.000000000
2020-01-02T01:01:01.000000000
+// page-1 1 2020-01-01T01:01:11.000000000
2020-01-02T01:01:11.000000000
+// page-2 1 2020-01-01T01:11:01.000000000
2020-01-02T01:11:01.000000000
+// page-3 1 2020-01-11T01:01:01.000000000
2020-01-12T01:01:01.000000000
+async fn prune_timestamps_nanos() {
+ test_prune(
+ Scenario::Timestamps,
+ "SELECT * FROM t where nanos < to_timestamp('2020-01-02 01:01:11Z')",
+ Some(0),
+ Some(5),
+ 10,
+ )
+ .await;
+}
+
+#[tokio::test]
+// null count min
max
+// page-0 1 2020-01-01T01:01:01.000000
2020-01-02T01:01:01.000000
+// page-1 1 2020-01-01T01:01:11.000000
2020-01-02T01:01:11.000000
+// page-2 1 2020-01-01T01:11:01.000000
2020-01-02T01:11:01.000000
+// page-3 1 2020-01-11T01:01:01.000000
2020-01-12T01:01:01.000000
+async fn prune_timestamps_micros() {
+ test_prune(
+ Scenario::Timestamps,
+ "SELECT * FROM t where micros < to_timestamp_micros('2020-01-02
01:01:11Z')",
+ Some(0),
+ Some(5),
+ 10,
+ )
+ .await;
+}
+
+#[tokio::test]
+// null count min
max
+// page-0 1 2020-01-01T01:01:01.000
2020-01-02T01:01:01.000
+// page-1 1 2020-01-01T01:01:11.000
2020-01-02T01:01:11.000
+// page-2 1 2020-01-01T01:11:01.000
2020-01-02T01:11:01.000
+// page-3 1 2020-01-11T01:01:01.000
2020-01-12T01:01:01.000
+async fn prune_timestamps_millis() {
+ test_prune(
+ Scenario::Timestamps,
+ "SELECT * FROM t where millis < to_timestamp_millis('2020-01-02
01:01:11Z')",
+ Some(0),
+ Some(5),
+ 10,
+ )
+ .await;
+}
+
+#[tokio::test]
+// null count min
max
+// page-0 1 1577840461
1577926861
+// page-1 1 1577840471
1577926871
+// page-2 1 1577841061
1577927461
+// page-3 1 1578704461
1578790861
+
+async fn prune_timestamps_seconds() {
+ test_prune(
+ Scenario::Timestamps,
+ "SELECT * FROM t where seconds < to_timestamp_seconds('2020-01-02
01:01:11Z')",
+ Some(0),
+ Some(5),
+ 10,
+ )
+ .await;
+}
+
+#[tokio::test]
+// null count min
max
+// page-0 1 2020-01-01
2020-01-04
+// page-1 1 2020-01-11
2020-01-14
+// page-2 1 2020-10-27
2020-10-30
+// page-3 1 2029-11-09
2029-11-12
+async fn prune_date32() {
+ test_prune(
+ Scenario::Dates,
+ "SELECT * FROM t where date32 < cast('2020-01-02' as date)",
+ Some(0),
+ Some(15),
+ 1,
+ )
+ .await;
+}
+
+#[tokio::test]
+// null count min
max
+// page-0 1 2020-01-01
2020-01-04
+// page-1 1 2020-01-11
2020-01-14
+// page-2 1 2020-10-27
2020-10-30
+// page-3 1 2029-11-09
2029-11-12
+async fn prune_date64() {
+ // work around for not being able to cast Date32 to Date64 automatically
+ let date = "2020-01-02"
+ .parse::<chrono::NaiveDate>()
+ .unwrap()
+ .and_time(chrono::NaiveTime::from_hms_opt(0, 0, 0).unwrap());
+ let date = ScalarValue::Date64(Some(date.timestamp_millis()));
+
+ let output = ContextWithParquet::new(Scenario::Dates, Page)
+ .await
+ .query_with_expr(col("date64").lt(lit(date)))
+ .await;
+
+ println!("{}", output.description());
+ // This should prune out groups without error
+ assert_eq!(output.predicate_evaluation_errors(), Some(0));
+ assert_eq!(output.row_pages_pruned(), Some(15));
+ assert_eq!(output.result_rows, 1, "{}", output.description());
+}
+
+#[tokio::test]
+// null count min
max
+// page-0 0 -5
-1
+// page-1 0 -4 0
+// page-2 0 0
4
+// page-3 0 5
9
+async fn prune_int32_lt() {
+ test_prune(
+ Scenario::Int32,
+ "SELECT * FROM t where i < 1",
+ Some(0),
+ Some(5),
+ 11,
+ )
+ .await;
+ // result of sql "SELECT * FROM t where i < 1" is same as
+ // "SELECT * FROM t where -i > -1"
+ test_prune(
+ Scenario::Int32,
+ "SELECT * FROM t where -i > -1",
+ Some(0),
+ Some(5),
+ 11,
+ )
+ .await;
+}
+
+#[tokio::test]
+async fn prune_int32_eq() {
+ test_prune(
+ Scenario::Int32,
+ "SELECT * FROM t where i = 1",
+ Some(0),
+ Some(15),
+ 1,
+ )
+ .await;
+}
+#[tokio::test]
+#[ignore]
+async fn prune_int32_scalar_fun_and_eq() {
+ test_prune(
+ Scenario::Int32,
+ "SELECT * FROM t where abs(i) = 1 and i = 1",
+ Some(0),
+ Some(15),
+ 1,
+ )
+ .await;
+}
+
+#[tokio::test]
+#[ignore]
+async fn prune_int32_scalar_fun() {
+ test_prune(
+ Scenario::Int32,
+ "SELECT * FROM t where abs(i) = 1",
+ Some(0),
+ Some(0),
+ 3,
+ )
+ .await;
+}
+
+#[tokio::test]
+#[ignore]
+async fn prune_int32_complex_expr() {
+ test_prune(
+ Scenario::Int32,
+ "SELECT * FROM t where i+1 = 1",
+ Some(0),
+ Some(0),
+ 2,
+ )
+ .await;
+}
+
+#[tokio::test]
+#[ignore]
+async fn prune_int32_complex_expr_subtract() {
+ test_prune(
+ Scenario::Int32,
+ "SELECT * FROM t where 1-i > 1",
+ Some(0),
+ Some(0),
+ 9,
+ )
+ .await;
+}
+
+#[tokio::test]
+// null count min
max
+// page-0 0 -5.0
-1.0
+// page-1 0 -4.0
0.0
+// page-2 0 0.0
4.0
+// page-3 0 5.0
9.0
+async fn prune_f64_lt() {
+ test_prune(
+ Scenario::Float64,
+ "SELECT * FROM t where f < 1",
+ Some(0),
+ Some(5),
+ 11,
+ )
+ .await;
+ test_prune(
+ Scenario::Float64,
+ "SELECT * FROM t where -f > -1",
+ Some(0),
+ Some(5),
+ 11,
+ )
+ .await;
+}
+
+#[tokio::test]
+#[ignore]
+async fn prune_f64_scalar_fun_and_gt() {
+ // result of sql "SELECT * FROM t where abs(f - 1) <= 0.000001 and f >=
0.1"
+ // only use "f >= 0" to prune
+ test_prune(
+ Scenario::Float64,
+ "SELECT * FROM t where abs(f - 1) <= 0.000001 and f >= 0.1",
+ Some(0),
+ Some(2),
+ 1,
+ )
+ .await;
+}
+
+#[tokio::test]
+#[ignore]
+async fn prune_f64_scalar_fun() {
+ // result of sql "SELECT * FROM t where abs(f-1) <= 0.000001" is not
supported
+ test_prune(
+ Scenario::Float64,
+ "SELECT * FROM t where abs(f-1) <= 0.000001",
+ Some(0),
+ Some(0),
+ 1,
+ )
+ .await;
+}
+
+#[tokio::test]
+#[ignore]
+async fn prune_f64_complex_expr() {
+ // result of sql "SELECT * FROM t where f+1 > 1.1"" is not supported
+ test_prune(
+ Scenario::Float64,
+ "SELECT * FROM t where f+1 > 1.1",
+ Some(0),
+ Some(0),
+ 9,
+ )
+ .await;
+}
+
+#[tokio::test]
+#[ignore]
Review Comment:
All test case with expr fail ðŸ˜
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]