This is an automated email from the ASF dual-hosted git repository.

houqp pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/master by this push:
     new a3ffc52  add python binding for approx_distinct aggregate function 
(#1134)
a3ffc52 is described below

commit a3ffc529dd391ee47380f489be6b7c7c341b3b74
Author: Jiayu Liu <[email protected]>
AuthorDate: Mon Oct 18 03:09:35 2021 +0800

    add python binding for approx_distinct aggregate function (#1134)
---
 python/src/functions.rs          |  2 ++
 python/tests/test_aggregation.py | 47 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 49 insertions(+)

diff --git a/python/src/functions.rs b/python/src/functions.rs
index cecf28d..22a5ce4 100644
--- a/python/src/functions.rs
+++ b/python/src/functions.rs
@@ -224,6 +224,7 @@ define_unary_function!(avg);
 define_unary_function!(min);
 define_unary_function!(max);
 define_unary_function!(count);
+define_unary_function!(approx_distinct);
 
 #[pyclass(name = "Volatility", module = "datafusion.functions")]
 #[derive(Clone)]
@@ -323,6 +324,7 @@ pub fn init(module: &PyModule) -> PyResult<()> {
     module.add_class::<PyVolatility>()?;
     module.add_function(wrap_pyfunction!(abs, module)?)?;
     module.add_function(wrap_pyfunction!(acos, module)?)?;
+    module.add_function(wrap_pyfunction!(approx_distinct, module)?)?;
     module.add_function(wrap_pyfunction!(array, module)?)?;
     module.add_function(wrap_pyfunction!(ascii, module)?)?;
     module.add_function(wrap_pyfunction!(asin, module)?)?;
diff --git a/python/tests/test_aggregation.py b/python/tests/test_aggregation.py
new file mode 100644
index 0000000..f0996f9
--- /dev/null
+++ b/python/tests/test_aggregation.py
@@ -0,0 +1,47 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import pyarrow as pa
+import pytest
+from datafusion import ExecutionContext
+from datafusion import functions as f
+
+
[email protected]
+def df():
+    ctx = ExecutionContext()
+
+    # create a RecordBatch and a new DataFrame from it
+    batch = pa.RecordBatch.from_arrays(
+        [pa.array([1, 2, 3]), pa.array([4, 4, 6])],
+        names=["a", "b"],
+    )
+    return ctx.create_dataframe([[batch]])
+
+
+def test_built_in_aggregation(df):
+    col_a = f.col("a")
+    col_b = f.col("b")
+    df = df.aggregate(
+        [],
+        [f.max(col_a), f.min(col_a), f.count(col_a), f.approx_distinct(col_b)],
+    )
+    result = df.collect()[0]
+    assert result.column(0) == pa.array([3])
+    assert result.column(1) == pa.array([1])
+    assert result.column(2) == pa.array([3], type=pa.uint64())
+    assert result.column(3) == pa.array([2], type=pa.uint64())

Reply via email to