This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 56f8e3527b feat: expose centroids in approx_percentile_cont fluent api 
(#11878)
56f8e3527b is described below

commit 56f8e3527b78282511e51701223971f04244c8cf
Author: Michael J Ward <[email protected]>
AuthorDate: Thu Aug 8 15:41:27 2024 -0500

    feat: expose centroids in approx_percentile_cont fluent api (#11878)
    
    * feat: expose centroids in approx_percentile_count fluent api
    
    Closes https://github.com/apache/datafusion/issues/11877
    
    * avoid repeated import prefix in function signature
    
    * update test_fn_approx_percentile_cont so that adjusting centroids changes 
the result
---
 .../core/tests/dataframe/dataframe_functions.rs    | 19 +++++++++++++++++--
 .../src/approx_percentile_cont.rs                  | 22 +++++++++++++++-------
 .../proto/tests/cases/roundtrip_logical_plan.rs    |  3 ++-
 3 files changed, 34 insertions(+), 10 deletions(-)

diff --git a/datafusion/core/tests/dataframe/dataframe_functions.rs 
b/datafusion/core/tests/dataframe/dataframe_functions.rs
index 7a0e9888a6..1bd90fce83 100644
--- a/datafusion/core/tests/dataframe/dataframe_functions.rs
+++ b/datafusion/core/tests/dataframe/dataframe_functions.rs
@@ -360,7 +360,7 @@ async fn test_fn_approx_median() -> Result<()> {
 
 #[tokio::test]
 async fn test_fn_approx_percentile_cont() -> Result<()> {
-    let expr = approx_percentile_cont(col("b"), lit(0.5));
+    let expr = approx_percentile_cont(col("b"), lit(0.5), None);
 
     let expected = [
         "+---------------------------------------------+",
@@ -381,7 +381,7 @@ async fn test_fn_approx_percentile_cont() -> Result<()> {
         None::<&str>,
         "arg_2".to_string(),
     ));
-    let expr = approx_percentile_cont(col("b"), alias_expr);
+    let expr = approx_percentile_cont(col("b"), alias_expr, None);
     let df = create_test_table().await?;
     let expected = [
         "+--------------------------------------+",
@@ -394,6 +394,21 @@ async fn test_fn_approx_percentile_cont() -> Result<()> {
 
     assert_batches_eq!(expected, &batches);
 
+    // with number of centroids set
+    let expr = approx_percentile_cont(col("b"), lit(0.5), Some(lit(2)));
+    let expected = [
+        "+------------------------------------------------------+",
+        "| approx_percentile_cont(test.b,Float64(0.5),Int32(2)) |",
+        "+------------------------------------------------------+",
+        "| 30                                                   |",
+        "+------------------------------------------------------+",
+    ];
+
+    let df = create_test_table().await?;
+    let batches = df.aggregate(vec![], vec![expr]).unwrap().collect().await?;
+
+    assert_batches_eq!(expected, &batches);
+
     Ok(())
 }
 
diff --git a/datafusion/functions-aggregate/src/approx_percentile_cont.rs 
b/datafusion/functions-aggregate/src/approx_percentile_cont.rs
index af2a26fd05..ffa623c13b 100644
--- a/datafusion/functions-aggregate/src/approx_percentile_cont.rs
+++ b/datafusion/functions-aggregate/src/approx_percentile_cont.rs
@@ -46,13 +46,21 @@ use datafusion_physical_expr_common::aggregate::tdigest::{
 };
 use 
datafusion_physical_expr_common::utils::limited_convert_logical_expr_to_physical_expr_with_dfschema;
 
-make_udaf_expr_and_func!(
-    ApproxPercentileCont,
-    approx_percentile_cont,
-    expression percentile,
-    "Computes the approximate percentile continuous of a set of numbers",
-    approx_percentile_cont_udaf
-);
+create_func!(ApproxPercentileCont, approx_percentile_cont_udaf);
+
+/// Computes the approximate percentile continuous of a set of numbers
+pub fn approx_percentile_cont(
+    expression: Expr,
+    percentile: Expr,
+    centroids: Option<Expr>,
+) -> Expr {
+    let args = if let Some(centroids) = centroids {
+        vec![expression, percentile, centroids]
+    } else {
+        vec![expression, percentile]
+    };
+    approx_percentile_cont_udaf().call(args)
+}
 
 pub struct ApproxPercentileCont {
     signature: Signature,
diff --git a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs 
b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
index e5c2264184..a18fa03b2d 100644
--- a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
+++ b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
@@ -887,7 +887,8 @@ async fn roundtrip_expr_api() -> Result<()> {
         stddev_pop(lit(2.2)),
         approx_distinct(lit(2)),
         approx_median(lit(2)),
-        approx_percentile_cont(lit(2), lit(0.5)),
+        approx_percentile_cont(lit(2), lit(0.5), None),
+        approx_percentile_cont(lit(2), lit(0.5), Some(lit(50))),
         approx_percentile_cont_with_weight(lit(2), lit(1), lit(0.5)),
         grouping(lit(1)),
         bit_and(lit(2)),


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to