This is an automated email from the ASF dual-hosted git repository.

agrove pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-python.git


The following commit(s) were added to refs/heads/main by this push:
     new 3b5085e  Upgrade deps to datafusion 41 (#802)
3b5085e is described below

commit 3b5085e0c716298af7c8f025b8b9edf601344f02
Author: Michael J Ward <[email protected]>
AuthorDate: Fri Aug 23 09:43:39 2024 -0500

    Upgrade deps to datafusion 41 (#802)
    
    * update datafusion deps to point to githuc.com/apache/datafusion
    
    Datafusion 41 is not yet released on crates.io.
    
    * update TableProvider::scan
    
    Ref: https://github.com/apache/datafusion/pull/11516
    
    * use SessionStateBuilder
    
    The old constructor is deprecated.
    
    Ref: https://github.com/apache/datafusion/pull/11403
    
    * update AggregateFunction
    
    Upstream Changes:
    - The field name was switched from `func_name` to func.
    - AggregateFunctionDefinition was removed
    
    Ref: https://github.com/apache/datafusion/pull/11803
    
    * update imports in catalog
    
    Catlog API was extracted to a separate crate.
    
    Ref: https://github.com/apache/datafusion/pull/11516
    
    * use appropriate path for approx_distinct
    
    Ref: https://github.com/apache/datafusion/pull/11644
    
    * migrate AggregateExt to ExprFunctionExt
    
    Also removed `sqlparser` dependency since it's re-exported upstream.
    
    Ref: https://github.com/apache/datafusion/pull/11550
    
    * update regr_count tests for new return type
    
    Ref: https://github.com/apache/datafusion/pull/11731
    
    * migrate from function-array to functions-nested
    
    The package was renamed upstream.
    
    Ref: https://github.com/apache/datafusion/pull/11602
    
    * cargo fmt
    
    * lock datafusion deps to 41
    
    * remove todo from cargo.toml
    
    All the datafusion dependencies are re-exported, but I still need to figure 
out *why*.
---
 Cargo.lock                                | 108 ++++++++++++++++++++----------
 Cargo.toml                                |  15 ++---
 python/datafusion/tests/test_functions.py |   6 +-
 src/catalog.rs                            |   2 +-
 src/common/data_type.rs                   |  17 ++---
 src/context.rs                            |  11 +--
 src/dataset.rs                            |   4 +-
 src/expr/aggregate.rs                     |  10 ++-
 src/expr/aggregate_expr.rs                |   4 +-
 src/functions.rs                          |  44 ++++++------
 10 files changed, 126 insertions(+), 95 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index e598112..bca4bf0 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -680,6 +680,12 @@ dependencies = [
  "cfg-if",
 ]
 
+[[package]]
+name = "crossbeam-utils"
+version = "0.8.20"
+source = "registry+https://github.com/rust-lang/crates.io-index";
+checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80"
+
 [[package]]
 name = "crunchy"
 version = "0.2.2"
@@ -725,11 +731,12 @@ checksum = 
"7762d17f1241643615821a8455a0b2c3e803784b058693d990b11f2dce25a0ca"
 
 [[package]]
 name = "dashmap"
-version = "5.5.3"
+version = "6.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "978747c1d849a7d2ee5e8adc0159961c48fb7e5db2f06af6723b80123bb53856"
+checksum = "804c8821570c3f8b70230c2ba75ffa5c0f9a4189b9a432b6656c536712acae28"
 dependencies = [
  "cfg-if",
+ "crossbeam-utils",
  "hashbrown",
  "lock_api",
  "once_cell",
@@ -738,9 +745,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion"
-version = "40.0.0"
+version = "41.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "ab9d55a9cd2634818953809f75ebe5248b00dd43c3227efb2a51a2d5feaad54e"
+checksum = "e4fd4a99fc70d40ef7e52b243b4a399c3f8d353a40d5ecb200deee05e49c61bb"
 dependencies = [
  "ahash",
  "apache-avro",
@@ -754,16 +761,18 @@ dependencies = [
  "bzip2",
  "chrono",
  "dashmap",
+ "datafusion-catalog",
  "datafusion-common",
  "datafusion-common-runtime",
  "datafusion-execution",
  "datafusion-expr",
  "datafusion-functions",
  "datafusion-functions-aggregate",
- "datafusion-functions-array",
+ "datafusion-functions-nested",
  "datafusion-optimizer",
  "datafusion-physical-expr",
  "datafusion-physical-expr-common",
+ "datafusion-physical-optimizer",
  "datafusion-physical-plan",
  "datafusion-sql",
  "flate2",
@@ -792,11 +801,25 @@ dependencies = [
  "zstd 0.13.2",
 ]
 
+[[package]]
+name = "datafusion-catalog"
+version = "41.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index";
+checksum = "e13b3cfbd84c6003594ae1972314e3df303a27ce8ce755fcea3240c90f4c0529"
+dependencies = [
+ "arrow-schema",
+ "async-trait",
+ "datafusion-common",
+ "datafusion-execution",
+ "datafusion-expr",
+ "datafusion-physical-plan",
+]
+
 [[package]]
 name = "datafusion-common"
-version = "40.0.0"
+version = "41.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "def66b642959e7f96f5d2da22e1f43d3bd35598f821e5ce351a0553e0f1b7367"
+checksum = "44fdbc877e3e40dcf88cc8f283d9f5c8851f0a3aa07fee657b1b75ac1ad49b9c"
 dependencies = [
  "ahash",
  "apache-avro",
@@ -818,18 +841,18 @@ dependencies = [
 
 [[package]]
 name = "datafusion-common-runtime"
-version = "40.0.0"
+version = "41.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "f104bb9cb44c06c9badf8a0d7e0855e5f7fa5e395b887d7f835e8a9457dc1352"
+checksum = "8a7496d1f664179f6ce3a5cbef6566056ccaf3ea4aa72cc455f80e62c1dd86b1"
 dependencies = [
  "tokio",
 ]
 
 [[package]]
 name = "datafusion-execution"
-version = "40.0.0"
+version = "41.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "2ac0fd8b5d80bbca3fc3b6f40da4e9f6907354824ec3b18bbd83fee8cf5c3c3e"
+checksum = "799e70968c815b611116951e3dd876aef04bf217da31b72eec01ee6a959336a1"
 dependencies = [
  "arrow",
  "chrono",
@@ -848,9 +871,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-expr"
-version = "40.0.0"
+version = "41.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "2103d2cc16fb11ef1fa993a6cac57ed5cb028601db4b97566c90e5fa77aa1e68"
+checksum = "1c1841c409d9518c17971d15c9bae62e629eb937e6fb6c68cd32e9186f8b30d2"
 dependencies = [
  "ahash",
  "arrow",
@@ -867,11 +890,12 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions"
-version = "40.0.0"
+version = "41.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "a369332afd0ef5bd565f6db2139fb9f1dfdd0afa75a7f70f000b74208d76994f"
+checksum = "a8e481cf34d2a444bd8fa09b65945f0ce83dc92df8665b761505b3d9f351bebb"
 dependencies = [
  "arrow",
+ "arrow-buffer",
  "base64 0.22.1",
  "blake2",
  "blake3",
@@ -893,9 +917,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-aggregate"
-version = "40.0.0"
+version = "41.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "92718db1aff70c47e5abf9fc975768530097059e5db7c7b78cd64b5e9a11fc77"
+checksum = "2b4ece19f73c02727e5e8654d79cd5652de371352c1df3c4ac3e419ecd6943fb"
 dependencies = [
  "ahash",
  "arrow",
@@ -910,10 +934,10 @@ dependencies = [
 ]
 
 [[package]]
-name = "datafusion-functions-array"
-version = "40.0.0"
+name = "datafusion-functions-nested"
+version = "41.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "30bb80f46ff3dcf4bb4510209c2ba9b8ce1b716ac8b7bf70c6bf7dca6260c831"
+checksum = "a1474552cc824e8c9c88177d454db5781d4b66757d4aca75719306b8343a5e8d"
 dependencies = [
  "arrow",
  "arrow-array",
@@ -928,13 +952,14 @@ dependencies = [
  "itertools 0.12.1",
  "log",
  "paste",
+ "rand",
 ]
 
 [[package]]
 name = "datafusion-optimizer"
-version = "40.0.0"
+version = "41.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "82f34692011bec4fdd6fc18c264bf8037b8625d801e6dd8f5111af15cb6d71d3"
+checksum = "791ff56f55608bc542d1ea7a68a64bdc86a9413f5a381d06a39fd49c2a3ab906"
 dependencies = [
  "arrow",
  "async-trait",
@@ -952,9 +977,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-physical-expr"
-version = "40.0.0"
+version = "41.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "45538630defedb553771434a437f7ca8f04b9b3e834344aafacecb27dc65d5e5"
+checksum = "9a223962b3041304a3e20ed07a21d5de3d88d7e4e71ca192135db6d24e3365a4"
 dependencies = [
  "ahash",
  "arrow",
@@ -982,9 +1007,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-physical-expr-common"
-version = "40.0.0"
+version = "41.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "9d8a72b0ca908e074aaeca52c14ddf5c28d22361e9cb6bc79bb733cd6661b536"
+checksum = "db5e7d8532a1601cd916881db87a70b0a599900d23f3db2897d389032da53bc6"
 dependencies = [
  "ahash",
  "arrow",
@@ -994,11 +1019,23 @@ dependencies = [
  "rand",
 ]
 
+[[package]]
+name = "datafusion-physical-optimizer"
+version = "41.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index";
+checksum = "fdb9c78f308e050f5004671039786a925c3fee83b90004e9fcfd328d7febdcc0"
+dependencies = [
+ "datafusion-common",
+ "datafusion-execution",
+ "datafusion-physical-expr",
+ "datafusion-physical-plan",
+]
+
 [[package]]
 name = "datafusion-physical-plan"
-version = "40.0.0"
+version = "41.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "b504eae6107a342775e22e323e9103f7f42db593ec6103b28605b7b7b1405c4a"
+checksum = "8d1116949432eb2d30f6362707e2846d942e491052a206f2ddcb42d08aea1ffe"
 dependencies = [
  "ahash",
  "arrow",
@@ -1037,7 +1074,7 @@ dependencies = [
  "datafusion",
  "datafusion-common",
  "datafusion-expr",
- "datafusion-functions-array",
+ "datafusion-functions-nested",
  "datafusion-optimizer",
  "datafusion-sql",
  "datafusion-substrait",
@@ -1051,7 +1088,6 @@ dependencies = [
  "pyo3-build-config",
  "rand",
  "regex-syntax",
- "sqlparser",
  "syn 2.0.72",
  "tokio",
  "url",
@@ -1060,9 +1096,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-sql"
-version = "40.0.0"
+version = "41.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "e5db33f323f41b95ae201318ba654a9bf11113e58a51a1dff977b1a836d3d889"
+checksum = "b45d0180711165fe94015d7c4123eb3e1cf5fb60b1506453200b8d1ce666bef0"
 dependencies = [
  "arrow",
  "arrow-array",
@@ -1077,9 +1113,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-substrait"
-version = "40.0.0"
+version = "41.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "434e52fbff22e6e04e6c787f603a6aba4961a7e249a29c743c5d4f609ec2dcef"
+checksum = "bf0a0055aa98246c79f98f0d03df11f16cb7adc87818d02d4413e3f3cdadbbee"
 dependencies = [
  "arrow-buffer",
  "async-recursion",
@@ -2898,9 +2934,9 @@ checksum = 
"6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67"
 
 [[package]]
 name = "sqlparser"
-version = "0.47.0"
+version = "0.49.0"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "295e9930cd7a97e58ca2a070541a3ca502b17f5d1fa7157376d0fabd85324f25"
+checksum = "a4a404d0e14905361b918cb8afdb73605e25c1d5029312bd9785142dcb3aa49e"
 dependencies = [
  "log",
  "sqlparser_derive",
diff --git a/Cargo.toml b/Cargo.toml
index 820118f..8881884 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -38,13 +38,13 @@ tokio = { version = "1.39", features = ["macros", "rt", 
"rt-multi-thread", "sync
 rand = "0.8"
 pyo3 = { version = "0.21", features = ["extension-module", "abi3", 
"abi3-py38"] }
 arrow = { version = "52", feature = ["pyarrow"] }
-datafusion = { version = "40.0.0", features = ["pyarrow", "avro", 
"unicode_expressions"] }
-datafusion-common = { version = "40.0.0", features = ["pyarrow"] }
-datafusion-expr = "40.0.0"
-datafusion-functions-array = "40.0.0"
-datafusion-optimizer = "40.0.0"
-datafusion-sql = "40.0.0"
-datafusion-substrait = { version = "40.0.0", optional = true }
+datafusion = { version = "41.0.0", features = ["pyarrow", "avro", 
"unicode_expressions"] }
+datafusion-common = { version = "41.0.0", features = ["pyarrow"] }
+datafusion-expr = { version = "41.0.0" }
+datafusion-functions-nested = { version = "41.0.0" }
+datafusion-optimizer = { version = "41.0.0" }
+datafusion-sql = { version = "41.0.0" }
+datafusion-substrait = { version = "41.0.0", optional = true }
 prost = "0.12" # keep in line with `datafusion-substrait`
 prost-types = "0.12" # keep in line with `datafusion-substrait`
 uuid = { version = "1.9", features = ["v4"] }
@@ -56,7 +56,6 @@ parking_lot = "0.12"
 regex-syntax = "0.8"
 syn = "2.0.68"
 url = "2"
-sqlparser = "0.47.0"
 
 [build-dependencies]
 pyo3-build-config = "0.21"
diff --git a/python/datafusion/tests/test_functions.py 
b/python/datafusion/tests/test_functions.py
index 2939123..b8ad9c0 100644
--- a/python/datafusion/tests/test_functions.py
+++ b/python/datafusion/tests/test_functions.py
@@ -808,7 +808,7 @@ def test_regr_funcs_sql(df):
 
     assert result[0].column(0) == pa.array([None], type=pa.float64())
     assert result[0].column(1) == pa.array([None], type=pa.float64())
-    assert result[0].column(2) == pa.array([1], type=pa.float64())
+    assert result[0].column(2) == pa.array([1], type=pa.uint64())
     assert result[0].column(3) == pa.array([None], type=pa.float64())
     assert result[0].column(4) == pa.array([1], type=pa.float64())
     assert result[0].column(5) == pa.array([1], type=pa.float64())
@@ -840,7 +840,7 @@ def test_regr_funcs_sql_2():
     # Assertions for SQL results
     assert result_sql[0].column(0) == pa.array([2], type=pa.float64())
     assert result_sql[0].column(1) == pa.array([0], type=pa.float64())
-    assert result_sql[0].column(2) == pa.array([3], type=pa.float64()) # todo: 
i would not expect this to be float
+    assert result_sql[0].column(2) == pa.array([3], type=pa.uint64())
     assert result_sql[0].column(3) == pa.array([1], type=pa.float64())
     assert result_sql[0].column(4) == pa.array([2], type=pa.float64())
     assert result_sql[0].column(5) == pa.array([4], type=pa.float64())
@@ -852,7 +852,7 @@ def test_regr_funcs_sql_2():
 @pytest.mark.parametrize("func, expected", [
     pytest.param(f.regr_slope, pa.array([2], type=pa.float64()), 
id="regr_slope"),
     pytest.param(f.regr_intercept, pa.array([0], type=pa.float64()), 
id="regr_intercept"),
-    pytest.param(f.regr_count, pa.array([3], type=pa.float64()), 
id="regr_count"), # TODO: I would expect this to return an int array
+    pytest.param(f.regr_count, pa.array([3], type=pa.uint64()), 
id="regr_count"),
     pytest.param(f.regr_r2, pa.array([1], type=pa.float64()), id="regr_r2"),
     pytest.param(f.regr_avgx, pa.array([2], type=pa.float64()), 
id="regr_avgx"),
     pytest.param(f.regr_avgy, pa.array([4], type=pa.float64()), 
id="regr_avgy"),
diff --git a/src/catalog.rs b/src/catalog.rs
index 49fe140..1ce66a4 100644
--- a/src/catalog.rs
+++ b/src/catalog.rs
@@ -25,7 +25,7 @@ use crate::errors::DataFusionError;
 use crate::utils::wait_for_future;
 use datafusion::{
     arrow::pyarrow::ToPyArrow,
-    catalog::{schema::SchemaProvider, CatalogProvider},
+    catalog::{CatalogProvider, SchemaProvider},
     datasource::{TableProvider, TableType},
 };
 
diff --git a/src/common/data_type.rs b/src/common/data_type.rs
index 469bb78..21b085c 100644
--- a/src/common/data_type.rs
+++ b/src/common/data_type.rs
@@ -18,6 +18,7 @@
 use datafusion::arrow::array::Array;
 use datafusion::arrow::datatypes::{DataType, IntervalUnit, TimeUnit};
 use datafusion_common::{DataFusionError, ScalarValue};
+use datafusion_expr::sqlparser::ast::NullTreatment as DFNullTreatment;
 use pyo3::{exceptions::PyValueError, prelude::*};
 
 use crate::errors::py_datafusion_err;
@@ -775,20 +776,20 @@ pub enum NullTreatment {
     RESPECT_NULLS,
 }
 
-impl From<NullTreatment> for sqlparser::ast::NullTreatment {
-    fn from(null_treatment: NullTreatment) -> sqlparser::ast::NullTreatment {
+impl From<NullTreatment> for DFNullTreatment {
+    fn from(null_treatment: NullTreatment) -> DFNullTreatment {
         match null_treatment {
-            NullTreatment::IGNORE_NULLS => 
sqlparser::ast::NullTreatment::IgnoreNulls,
-            NullTreatment::RESPECT_NULLS => 
sqlparser::ast::NullTreatment::RespectNulls,
+            NullTreatment::IGNORE_NULLS => DFNullTreatment::IgnoreNulls,
+            NullTreatment::RESPECT_NULLS => DFNullTreatment::RespectNulls,
         }
     }
 }
 
-impl From<sqlparser::ast::NullTreatment> for NullTreatment {
-    fn from(null_treatment: sqlparser::ast::NullTreatment) -> NullTreatment {
+impl From<DFNullTreatment> for NullTreatment {
+    fn from(null_treatment: DFNullTreatment) -> NullTreatment {
         match null_treatment {
-            sqlparser::ast::NullTreatment::IgnoreNulls => 
NullTreatment::IGNORE_NULLS,
-            sqlparser::ast::NullTreatment::RespectNulls => 
NullTreatment::RESPECT_NULLS,
+            DFNullTreatment::IgnoreNulls => NullTreatment::IGNORE_NULLS,
+            DFNullTreatment::RespectNulls => NullTreatment::RESPECT_NULLS,
         }
     }
 }
diff --git a/src/context.rs b/src/context.rs
index d7890e3..a43599c 100644
--- a/src/context.rs
+++ b/src/context.rs
@@ -20,6 +20,7 @@ use std::path::PathBuf;
 use std::str::FromStr;
 use std::sync::Arc;
 
+use datafusion::execution::session_state::SessionStateBuilder;
 use object_store::ObjectStore;
 use url::Url;
 use uuid::Uuid;
@@ -49,9 +50,7 @@ use datafusion::datasource::listing::{
 };
 use datafusion::datasource::MemTable;
 use datafusion::datasource::TableProvider;
-use datafusion::execution::context::{
-    SQLOptions, SessionConfig, SessionContext, SessionState, TaskContext,
-};
+use datafusion::execution::context::{SQLOptions, SessionConfig, 
SessionContext, TaskContext};
 use datafusion::execution::disk_manager::DiskManagerConfig;
 use datafusion::execution::memory_pool::{FairSpillPool, GreedyMemoryPool, 
UnboundedMemoryPool};
 use datafusion::execution::runtime_env::{RuntimeConfig, RuntimeEnv};
@@ -281,7 +280,11 @@ impl PySessionContext {
             RuntimeConfig::default()
         };
         let runtime = Arc::new(RuntimeEnv::new(runtime_config)?);
-        let session_state = SessionState::new_with_config_rt(config, runtime);
+        let session_state = SessionStateBuilder::new()
+            .with_config(config)
+            .with_runtime_env(runtime)
+            .with_default_features()
+            .build();
         Ok(PySessionContext {
             ctx: SessionContext::new_with_state(session_state),
         })
diff --git a/src/dataset.rs b/src/dataset.rs
index 724b4af..b570416 100644
--- a/src/dataset.rs
+++ b/src/dataset.rs
@@ -15,6 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
+use datafusion::catalog::Session;
 use pyo3::exceptions::PyValueError;
 /// Implements a Datafusion TableProvider that delegates to a PyArrow Dataset
 /// This allows us to use PyArrow Datasets as Datafusion tables while pushing 
down projections and filters
@@ -30,7 +31,6 @@ use datafusion::arrow::datatypes::SchemaRef;
 use datafusion::arrow::pyarrow::PyArrowType;
 use datafusion::datasource::{TableProvider, TableType};
 use datafusion::error::{DataFusionError, Result as DFResult};
-use datafusion::execution::context::SessionState;
 use datafusion::logical_expr::TableProviderFilterPushDown;
 use datafusion::physical_plan::ExecutionPlan;
 use datafusion_expr::Expr;
@@ -98,7 +98,7 @@ impl TableProvider for Dataset {
     /// parallelized or distributed.
     async fn scan(
         &self,
-        _ctx: &SessionState,
+        _ctx: &dyn Session,
         projection: Option<&Vec<usize>>,
         filters: &[Expr],
         // limit can be used to reduce the amount scanned
diff --git a/src/expr/aggregate.rs b/src/expr/aggregate.rs
index 626d92c..e3d1bb1 100644
--- a/src/expr/aggregate.rs
+++ b/src/expr/aggregate.rs
@@ -126,9 +126,9 @@ impl PyAggregate {
         match expr {
             // TODO: This Alias logic seems to be returning some strange 
results that we should investigate
             Expr::Alias(Alias { expr, .. }) => 
self._aggregation_arguments(expr.as_ref()),
-            Expr::AggregateFunction(AggregateFunction {
-                func_def: _, args, ..
-            }) => Ok(args.iter().map(|e| PyExpr::from(e.clone())).collect()),
+            Expr::AggregateFunction(AggregateFunction { func: _, args, .. }) 
=> {
+                Ok(args.iter().map(|e| PyExpr::from(e.clone())).collect())
+            }
             _ => Err(py_type_err(
                 "Encountered a non Aggregate type in aggregation_arguments",
             )),
@@ -138,9 +138,7 @@ impl PyAggregate {
     fn _agg_func_name(expr: &Expr) -> PyResult<String> {
         match expr {
             Expr::Alias(Alias { expr, .. }) => 
Self::_agg_func_name(expr.as_ref()),
-            Expr::AggregateFunction(AggregateFunction { func_def, .. }) => {
-                Ok(func_def.name().to_owned())
-            }
+            Expr::AggregateFunction(AggregateFunction { func, .. }) => 
Ok(func.name().to_owned()),
             _ => Err(py_type_err(
                 "Encountered a non Aggregate type in agg_func_name",
             )),
diff --git a/src/expr/aggregate_expr.rs b/src/expr/aggregate_expr.rs
index 04ec29a..15097e0 100644
--- a/src/expr/aggregate_expr.rs
+++ b/src/expr/aggregate_expr.rs
@@ -41,7 +41,7 @@ impl From<AggregateFunction> for PyAggregateFunction {
 impl Display for PyAggregateFunction {
     fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
         let args: Vec<String> = self.aggr.args.iter().map(|expr| 
expr.to_string()).collect();
-        write!(f, "{}({})", self.aggr.func_def.name(), args.join(", "))
+        write!(f, "{}({})", self.aggr.func.name(), args.join(", "))
     }
 }
 
@@ -49,7 +49,7 @@ impl Display for PyAggregateFunction {
 impl PyAggregateFunction {
     /// Get the aggregate type, such as "MIN", or "MAX"
     fn aggregate_type(&self) -> String {
-        self.aggr.func_def.name().to_string()
+        self.aggr.func.name().to_string()
     }
 
     /// is this a distinct aggregate such as `COUNT(DISTINCT expr)`
diff --git a/src/functions.rs b/src/functions.rs
index f8f4781..c53d4ad 100644
--- a/src/functions.rs
+++ b/src/functions.rs
@@ -16,7 +16,7 @@
 // under the License.
 
 use datafusion::functions_aggregate::all_default_aggregate_functions;
-use datafusion_expr::AggregateExt;
+use datafusion_expr::ExprFunctionExt;
 use pyo3::{prelude::*, wrap_pyfunction};
 
 use crate::common::data_type::NullTreatment;
@@ -30,16 +30,15 @@ use datafusion::functions;
 use datafusion::functions_aggregate;
 use datafusion_common::{Column, ScalarValue, TableReference};
 use datafusion_expr::expr::Alias;
+use datafusion_expr::sqlparser::ast::NullTreatment as DFNullTreatment;
 use datafusion_expr::{
-    expr::{
-        find_df_window_func, AggregateFunction, AggregateFunctionDefinition, 
Sort, WindowFunction,
-    },
+    expr::{find_df_window_func, AggregateFunction, Sort, WindowFunction},
     lit, Expr, WindowFunctionDefinition,
 };
 
 #[pyfunction]
 pub fn approx_distinct(expression: PyExpr) -> PyExpr {
-    
functions_aggregate::expr_fn::approx_distinct::approx_distinct(expression.expr).into()
+    functions_aggregate::expr_fn::approx_distinct(expression.expr).into()
 }
 
 #[pyfunction]
@@ -342,9 +341,8 @@ pub fn first_value(
         builder = builder.filter(filter.expr);
     }
 
-    if let Some(null_treatment) = null_treatment {
-        builder = builder.null_treatment(null_treatment.into())
-    }
+    // would be nice if all the options builder methods accepted Option<T> ...
+    builder = 
builder.null_treatment(null_treatment.map(DFNullTreatment::from));
 
     Ok(builder.build()?.into())
 }
@@ -373,9 +371,7 @@ pub fn last_value(
         builder = builder.filter(filter.expr);
     }
 
-    if let Some(null_treatment) = null_treatment {
-        builder = builder.null_treatment(null_treatment.into())
-    }
+    builder = 
builder.null_treatment(null_treatment.map(DFNullTreatment::from));
 
     Ok(builder.build()?.into())
 }
@@ -392,14 +388,14 @@ fn in_list(expr: PyExpr, value: Vec<PyExpr>, negated: 
bool) -> PyExpr {
 
 #[pyfunction]
 fn make_array(exprs: Vec<PyExpr>) -> PyExpr {
-    datafusion_functions_array::expr_fn::make_array(exprs.into_iter().map(|x| 
x.into()).collect())
+    datafusion_functions_nested::expr_fn::make_array(exprs.into_iter().map(|x| 
x.into()).collect())
         .into()
 }
 
 #[pyfunction]
 fn array_concat(exprs: Vec<PyExpr>) -> PyExpr {
     let exprs = exprs.into_iter().map(|x| x.into()).collect();
-    datafusion_functions_array::expr_fn::array_concat(exprs).into()
+    datafusion_functions_nested::expr_fn::array_concat(exprs).into()
 }
 
 #[pyfunction]
@@ -411,12 +407,12 @@ fn array_cat(exprs: Vec<PyExpr>) -> PyExpr {
 fn array_position(array: PyExpr, element: PyExpr, index: Option<i64>) -> 
PyExpr {
     let index = ScalarValue::Int64(index);
     let index = Expr::Literal(index);
-    datafusion_functions_array::expr_fn::array_position(array.into(), 
element.into(), index).into()
+    datafusion_functions_nested::expr_fn::array_position(array.into(), 
element.into(), index).into()
 }
 
 #[pyfunction]
 fn array_slice(array: PyExpr, begin: PyExpr, end: PyExpr, stride: 
Option<PyExpr>) -> PyExpr {
-    datafusion_functions_array::expr_fn::array_slice(
+    datafusion_functions_nested::expr_fn::array_slice(
         array.into(),
         begin.into(),
         end.into(),
@@ -638,18 +634,16 @@ fn window(
 }
 
 macro_rules! aggregate_function {
-    ($NAME: ident, $FUNC: ident) => {
+    ($NAME: ident, $FUNC: path) => {
         aggregate_function!($NAME, $FUNC, stringify!($NAME));
     };
-    ($NAME: ident, $FUNC: ident, $DOC: expr) => {
+    ($NAME: ident, $FUNC: path, $DOC: expr) => {
         #[doc = $DOC]
         #[pyfunction]
         #[pyo3(signature = (*args, distinct=false))]
         fn $NAME(args: Vec<PyExpr>, distinct: bool) -> PyExpr {
             let expr = 
datafusion_expr::Expr::AggregateFunction(AggregateFunction {
-                func_def: AggregateFunctionDefinition::BuiltIn(
-                    
datafusion_expr::aggregate_function::AggregateFunction::$FUNC,
-                ),
+                func: $FUNC(),
                 args: args.into_iter().map(|e| e.into()).collect(),
                 distinct,
                 filter: None,
@@ -701,7 +695,7 @@ macro_rules! expr_fn_vec {
     };
 }
 
-/// Generates a [pyo3] wrapper for [datafusion_functions_array::expr_fn]
+/// Generates a [pyo3] wrapper for [datafusion_functions_nested::expr_fn]
 ///
 /// These functions have explicit named arguments.
 macro_rules! array_fn {
@@ -718,7 +712,7 @@ macro_rules! array_fn {
         #[doc = $DOC]
         #[pyfunction]
         fn $FUNC($($arg: PyExpr),*) -> PyExpr {
-            datafusion_functions_array::expr_fn::$FUNC($($arg.into()),*).into()
+            
datafusion_functions_nested::expr_fn::$FUNC($($arg.into()),*).into()
         }
     };
 }
@@ -884,9 +878,9 @@ array_fn!(array_resize, array size value);
 array_fn!(flatten, array);
 array_fn!(range, start stop step);
 
-aggregate_function!(array_agg, ArrayAgg);
-aggregate_function!(max, Max);
-aggregate_function!(min, Min);
+aggregate_function!(array_agg, functions_aggregate::array_agg::array_agg_udaf);
+aggregate_function!(max, functions_aggregate::min_max::max_udaf);
+aggregate_function!(min, functions_aggregate::min_max::min_udaf);
 
 pub(crate) fn init_module(m: &Bound<'_, PyModule>) -> PyResult<()> {
     m.add_wrapped(wrap_pyfunction!(abs))?;


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to