This is an automated email from the ASF dual-hosted git repository. timsaucer pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/datafusion-python.git
The following commit(s) were added to refs/heads/main by this push: new 13910780 feat: upgrade df48 dependency (#1143) 13910780 is described below commit 13910780216893e7602d26632411c8ae06869485 Author: Tim Saucer <timsau...@gmail.com> AuthorDate: Mon Jun 16 17:18:55 2025 -0400 feat: upgrade df48 dependency (#1143) * Upgrade to DF 48 * Update unit test * Resolve clippy warnings * Update wrapper test to look for __repr__ special function * Add __repr__ where missing * Error in return of __repr__ * Remove patch now that DF48 is released * Expose lit_with_metadata and add unit test --- Cargo.lock | 184 ++++++++++++++++++---------------- Cargo.toml | 10 +- python/datafusion/__init__.py | 19 ++++ python/datafusion/catalog.py | 12 +++ python/datafusion/context.py | 4 + python/datafusion/expr.py | 18 ++++ python/datafusion/user_defined.py | 12 +++ python/tests/test_expr.py | 60 ++++++++++- python/tests/test_wrapper_coverage.py | 7 +- src/context.rs | 47 +++++++-- src/expr.rs | 34 ++++--- src/expr/literal.rs | 16 ++- src/expr/window.rs | 29 ++---- src/functions.rs | 6 +- src/pyarrow_filter_expression.rs | 4 +- src/udwf.rs | 8 +- 16 files changed, 325 insertions(+), 145 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 39489ed9..112167cb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -359,6 +359,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "73a47aa0c771b5381de2b7f16998d351a6f4eb839f1e13d48353e17e873d969b" dependencies = [ "bitflags", + "serde", + "serde_json", ] [[package]] @@ -859,9 +861,9 @@ dependencies = [ [[package]] name = "datafusion" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ffe060b978f74ab446be722adb8a274e052e005bf6dfd171caadc3abaad10080" +checksum = "cc6cb8c2c81eada072059983657d6c9caf3fddefc43b4a65551d243253254a96" dependencies = [ "arrow", "arrow-ipc", @@ -887,7 +889,6 @@ dependencies = [ "datafusion-functions-nested", "datafusion-functions-table", "datafusion-functions-window", - "datafusion-macros", "datafusion-optimizer", "datafusion-physical-expr", "datafusion-physical-expr-common", @@ -902,7 +903,7 @@ dependencies = [ "object_store", "parking_lot", "parquet", - "rand 0.8.5", + "rand 0.9.1", "regex", "sqlparser", "tempfile", @@ -915,9 +916,9 @@ dependencies = [ [[package]] name = "datafusion-catalog" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61fe34f401bd03724a1f96d12108144f8cd495a3cdda2bf5e091822fb80b7e66" +checksum = "b7be8d1b627843af62e447396db08fe1372d882c0eb8d0ea655fd1fbc33120ee" dependencies = [ "arrow", "async-trait", @@ -941,9 +942,9 @@ dependencies = [ [[package]] name = "datafusion-catalog-listing" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4411b8e3bce5e0fc7521e44f201def2e2d5d1b5f176fb56e8cdc9942c890f00" +checksum = "38ab16c5ae43f65ee525fc493ceffbc41f40dee38b01f643dfcfc12959e92038" dependencies = [ "arrow", "async-trait", @@ -964,9 +965,9 @@ dependencies = [ [[package]] name = "datafusion-common" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0734015d81c8375eb5d4869b7f7ecccc2ee8d6cb81948ef737cd0e7b743bd69c" +checksum = "d3d56b2ac9f476b93ca82e4ef5fb00769c8a3f248d12b4965af7e27635fa7e12" dependencies = [ "ahash", "apache-avro", @@ -989,9 +990,9 @@ dependencies = [ [[package]] name = "datafusion-common-runtime" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5167bb1d2ccbb87c6bc36c295274d7a0519b14afcfdaf401d53cbcaa4ef4968b" +checksum = "16015071202d6133bc84d72756176467e3e46029f3ce9ad2cb788f9b1ff139b2" dependencies = [ "futures", "log", @@ -1000,9 +1001,9 @@ dependencies = [ [[package]] name = "datafusion-datasource" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04e602dcdf2f50c2abf297cc2203c73531e6f48b29516af7695d338cf2a778b1" +checksum = "b77523c95c89d2a7eb99df14ed31390e04ab29b43ff793e562bdc1716b07e17b" dependencies = [ "arrow", "async-compression", @@ -1025,7 +1026,7 @@ dependencies = [ "log", "object_store", "parquet", - "rand 0.8.5", + "rand 0.9.1", "tempfile", "tokio", "tokio-util", @@ -1036,9 +1037,9 @@ dependencies = [ [[package]] name = "datafusion-datasource-avro" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4ea5111aab9d3f2a8bff570343cccb03ce4c203875ef5a566b7d6f1eb72559e" +checksum = "1371cb4ef13c2e3a15685d37a07398cf13e3b0a85e705024b769fc4c511f5fef" dependencies = [ "apache-avro", "arrow", @@ -1061,9 +1062,9 @@ dependencies = [ [[package]] name = "datafusion-datasource-csv" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3bb2253952dc32296ed5b84077cb2e0257fea4be6373e1c376426e17ead4ef6" +checksum = "40d25c5e2c0ebe8434beeea997b8e88d55b3ccc0d19344293f2373f65bc524fc" dependencies = [ "arrow", "async-trait", @@ -1086,9 +1087,9 @@ dependencies = [ [[package]] name = "datafusion-datasource-json" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b8c7f47a5d2fe03bfa521ec9bafdb8a5c82de8377f60967c3663f00c8790352" +checksum = "3dc6959e1155741ab35369e1dc7673ba30fc45ed568fad34c01b7cb1daeb4d4c" dependencies = [ "arrow", "async-trait", @@ -1111,9 +1112,9 @@ dependencies = [ [[package]] name = "datafusion-datasource-parquet" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "27d15868ea39ed2dc266728b554f6304acd473de2142281ecfa1294bb7415923" +checksum = "b7a6afdfe358d70f4237f60eaef26ae5a1ce7cb2c469d02d5fc6c7fd5d84e58b" dependencies = [ "arrow", "async-trait", @@ -1136,21 +1137,21 @@ dependencies = [ "object_store", "parking_lot", "parquet", - "rand 0.8.5", + "rand 0.9.1", "tokio", ] [[package]] name = "datafusion-doc" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a91f8c2c5788ef32f48ff56c68e5b545527b744822a284373ac79bba1ba47292" +checksum = "9bcd8a3e3e3d02ea642541be23d44376b5d5c37c2938cce39b3873cdf7186eea" [[package]] name = "datafusion-execution" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06f004d100f49a3658c9da6fb0c3a9b760062d96cd4ad82ccc3b7b69a9fb2f84" +checksum = "670da1d45d045eee4c2319b8c7ea57b26cf48ab77b630aaa50b779e406da476a" dependencies = [ "arrow", "dashmap", @@ -1160,16 +1161,16 @@ dependencies = [ "log", "object_store", "parking_lot", - "rand 0.8.5", + "rand 0.9.1", "tempfile", "url", ] [[package]] name = "datafusion-expr" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a4e4ce3802609be38eeb607ee72f6fe86c3091460de9dbfae9e18db423b3964" +checksum = "b3a577f64bdb7e2cc4043cd97f8901d8c504711fde2dbcb0887645b00d7c660b" dependencies = [ "arrow", "chrono", @@ -1188,9 +1189,9 @@ dependencies = [ [[package]] name = "datafusion-expr-common" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "422ac9cf3b22bbbae8cdf8ceb33039107fde1b5492693168f13bd566b1bcc839" +checksum = "51b7916806ace3e9f41884f230f7f38ebf0e955dfbd88266da1826f29a0b9a6a" dependencies = [ "arrow", "datafusion-common", @@ -1201,9 +1202,9 @@ dependencies = [ [[package]] name = "datafusion-ffi" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5cf3fe9ab492c56daeb7beed526690d33622d388b8870472e0b7b7f55490338c" +checksum = "980cca31de37f5dadf7ea18e4ffc2b6833611f45bed5ef9de0831d2abb50f1ef" dependencies = [ "abi_stable", "arrow", @@ -1211,7 +1212,9 @@ dependencies = [ "async-ffi", "async-trait", "datafusion", + "datafusion-functions-aggregate-common", "datafusion-proto", + "datafusion-proto-common", "futures", "log", "prost", @@ -1221,9 +1224,9 @@ dependencies = [ [[package]] name = "datafusion-functions" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2ddf0a0a2db5d2918349c978d42d80926c6aa2459cd8a3c533a84ec4bb63479e" +checksum = "7fb31c9dc73d3e0c365063f91139dc273308f8a8e124adda9898db8085d68357" dependencies = [ "arrow", "arrow-buffer", @@ -1241,7 +1244,7 @@ dependencies = [ "itertools 0.14.0", "log", "md-5", - "rand 0.8.5", + "rand 0.9.1", "regex", "sha2", "unicode-segmentation", @@ -1250,9 +1253,9 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "408a05dafdc70d05a38a29005b8b15e21b0238734dab1e98483fcb58038c5aba" +checksum = "ebb72c6940697eaaba9bd1f746a697a07819de952b817e3fb841fb75331ad5d4" dependencies = [ "ahash", "arrow", @@ -1271,9 +1274,9 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate-common" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "756d21da2dd6c9bef97af1504970ff56cbf35d03fbd4ffd62827f02f4d2279d4" +checksum = "d7fdc54656659e5ecd49bf341061f4156ab230052611f4f3609612a0da259696" dependencies = [ "ahash", "arrow", @@ -1284,9 +1287,9 @@ dependencies = [ [[package]] name = "datafusion-functions-nested" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d8d50f6334b378930d992d801a10ac5b3e93b846b39e4a05085742572844537" +checksum = "fad94598e3374938ca43bca6b675febe557e7a14eb627d617db427d70d65118b" dependencies = [ "arrow", "arrow-ord", @@ -1305,9 +1308,9 @@ dependencies = [ [[package]] name = "datafusion-functions-table" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc9a97220736c8fff1446e936be90d57216c06f28969f9ffd3b72ac93c958c8a" +checksum = "de2fc6c2946da5cab8364fb28b5cac3115f0f3a87960b235ed031c3f7e2e639b" dependencies = [ "arrow", "async-trait", @@ -1321,10 +1324,11 @@ dependencies = [ [[package]] name = "datafusion-functions-window" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cefc2d77646e1aadd1d6a9c40088937aedec04e68c5f0465939912e1291f8193" +checksum = "3e5746548a8544870a119f556543adcd88fe0ba6b93723fe78ad0439e0fbb8b4" dependencies = [ + "arrow", "datafusion-common", "datafusion-doc", "datafusion-expr", @@ -1338,9 +1342,9 @@ dependencies = [ [[package]] name = "datafusion-functions-window-common" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd4aff082c42fa6da99ce0698c85addd5252928c908eb087ca3cfa64ff16b313" +checksum = "dcbe9404382cda257c434f22e13577bee7047031dfdb6216dd5e841b9465e6fe" dependencies = [ "datafusion-common", "datafusion-physical-expr-common", @@ -1348,9 +1352,9 @@ dependencies = [ [[package]] name = "datafusion-macros" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df6f88d7ee27daf8b108ba910f9015176b36fbc72902b1ca5c2a5f1d1717e1a1" +checksum = "8dce50e3b637dab0d25d04d2fe79dfdca2b257eabd76790bffd22c7f90d700c8" dependencies = [ "datafusion-expr", "quote", @@ -1359,9 +1363,9 @@ dependencies = [ [[package]] name = "datafusion-optimizer" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "084d9f979c4b155346d3c34b18f4256e6904ded508e9554d90fed416415c3515" +checksum = "03cfaacf06445dc3bbc1e901242d2a44f2cae99a744f49f3fefddcee46240058" dependencies = [ "arrow", "chrono", @@ -1378,9 +1382,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64c536062b0076f4e30084065d805f389f9fe38af0ca75bcbac86bc5e9fbab65" +checksum = "1908034a89d7b2630898e06863583ae4c00a0dd310c1589ca284195ee3f7f8a6" dependencies = [ "ahash", "arrow", @@ -1395,14 +1399,14 @@ dependencies = [ "itertools 0.14.0", "log", "paste", - "petgraph", + "petgraph 0.8.2", ] [[package]] name = "datafusion-physical-expr-common" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8a92b53b3193fac1916a1c5b8e3f4347c526f6822e56b71faa5fb372327a863" +checksum = "47b7a12dd59ea07614b67dbb01d85254fbd93df45bcffa63495e11d3bdf847df" dependencies = [ "ahash", "arrow", @@ -1414,9 +1418,9 @@ dependencies = [ [[package]] name = "datafusion-physical-optimizer" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6fa0a5ac94c7cf3da97bedabd69d6bbca12aef84b9b37e6e9e8c25286511b5e2" +checksum = "4371cc4ad33978cc2a8be93bd54a232d3f2857b50401a14631c0705f3f910aae" dependencies = [ "arrow", "datafusion-common", @@ -1433,9 +1437,9 @@ dependencies = [ [[package]] name = "datafusion-physical-plan" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "690c615db468c2e5fe5085b232d8b1c088299a6c63d87fd960a354a71f7acb55" +checksum = "dc47bc33025757a5c11f2cd094c5b6b5ed87f46fa33c023e6fdfa25fcbfade23" dependencies = [ "ahash", "arrow", @@ -1463,9 +1467,9 @@ dependencies = [ [[package]] name = "datafusion-proto" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4a1afb2bdb05de7ff65be6883ebfd4ec027bd9f1f21c46aa3afd01927160a83" +checksum = "d8f5d9acd7d96e3bf2a7bb04818373cab6e51de0356e3694b94905fee7b4e8b6" dependencies = [ "arrow", "chrono", @@ -1479,9 +1483,9 @@ dependencies = [ [[package]] name = "datafusion-proto-common" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35b7a5876ebd6b564fb9a1fd2c3a2a9686b787071a256b47e4708f0916f9e46f" +checksum = "09ecb5ec152c4353b60f7a5635489834391f7a291d2b39a4820cd469e318b78e" dependencies = [ "arrow", "datafusion-common", @@ -1513,9 +1517,9 @@ dependencies = [ [[package]] name = "datafusion-session" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad229a134c7406c057ece00c8743c0c34b97f4e72f78b475fe17b66c5e14fa4f" +checksum = "d7485da32283985d6b45bd7d13a65169dcbe8c869e25d01b2cfbc425254b4b49" dependencies = [ "arrow", "async-trait", @@ -1537,9 +1541,9 @@ dependencies = [ [[package]] name = "datafusion-sql" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64f6ab28b72b664c21a27b22a2ff815fd390ed224c26e89a93b5a8154a4e8607" +checksum = "a466b15632befddfeac68c125f0260f569ff315c6831538cbb40db754134e0df" dependencies = [ "arrow", "bigdecimal", @@ -1554,9 +1558,9 @@ dependencies = [ [[package]] name = "datafusion-substrait" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "061efc0937f0ce3abb37ed0d56cfa01dd0e654b90e408656d05e846c8b7599fe" +checksum = "f2f3973b1a4f6e9ee7fd99a22d58e1c06e6723a28dc911a60df575974c8339aa" dependencies = [ "async-recursion", "async-trait", @@ -2717,6 +2721,18 @@ dependencies = [ "indexmap", ] +[[package]] +name = "petgraph" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "54acf3a685220b533e437e264e4d932cfbdc4cc7ec0cd232ed73c08d03b8a7ca" +dependencies = [ + "fixedbitset", + "hashbrown 0.15.3", + "indexmap", + "serde", +] + [[package]] name = "phf" version = "0.11.3" @@ -2837,7 +2853,7 @@ dependencies = [ "log", "multimap", "once_cell", - "petgraph", + "petgraph 0.7.1", "prettyplease", "prost", "prost-types", @@ -3661,9 +3677,9 @@ dependencies = [ [[package]] name = "substrait" -version = "0.55.1" +version = "0.56.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "048fe52a3664881ccdfdc9bdb0f4e8805f3444ee64abf299d365c54f6a2ffabb" +checksum = "13de2e20128f2a018dab1cfa30be83ae069219a65968c6f89df66ad124de2397" dependencies = [ "heck", "pbjson", @@ -4016,9 +4032,9 @@ checksum = "1dccffe3ce07af9386bfd29e80c0ab1a8205a2fc34e4bcd40364df902cfa8f3f" [[package]] name = "typify" -version = "0.3.0" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e03ba3643450cfd95a1aca2e1938fef63c1c1994489337998aff4ad771f21ef8" +checksum = "6c6c647a34e851cf0260ccc14687f17cdcb8302ff1a8a687a24b97ca0f82406f" dependencies = [ "typify-impl", "typify-macro", @@ -4026,9 +4042,9 @@ dependencies = [ [[package]] name = "typify-impl" -version = "0.3.0" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bce48219a2f3154aaa2c56cbf027728b24a3c8fe0a47ed6399781de2b3f3eeaf" +checksum = "741b7f1e2e1338c0bee5ad5a7d3a9bbd4e24c33765c08b7691810e68d879365d" dependencies = [ "heck", "log", @@ -4046,9 +4062,9 @@ dependencies = [ [[package]] name = "typify-macro" -version = "0.3.0" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68b5780d745920ed73c5b7447496a9b5c42ed2681a9b70859377aec423ecf02b" +checksum = "7560adf816a1e8dad7c63d8845ef6e31e673e39eab310d225636779230cbedeb" dependencies = [ "proc-macro2", "quote", @@ -4116,9 +4132,9 @@ checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" [[package]] name = "uuid" -version = "1.16.0" +version = "1.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "458f7a779bf54acc9f347480ac654f68407d3aab21269a6e3c9f922acd9e2da9" +checksum = "3cf4199d1e5d15ddd86a694e4d0dffa9c323ce759fea589f00fef9d81cc1931d" dependencies = [ "getrandom 0.3.3", "js-sys", diff --git a/Cargo.toml b/Cargo.toml index 8107d76d..4135e64e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -37,11 +37,11 @@ substrait = ["dep:datafusion-substrait"] tokio = { version = "1.45", features = ["macros", "rt", "rt-multi-thread", "sync"] } pyo3 = { version = "0.24", features = ["extension-module", "abi3", "abi3-py39"] } pyo3-async-runtimes = { version = "0.24", features = ["tokio-runtime"]} -arrow = { version = "55.0.0", features = ["pyarrow"] } -datafusion = { version = "47.0.0", features = ["avro", "unicode_expressions"] } -datafusion-substrait = { version = "47.0.0", optional = true } -datafusion-proto = { version = "47.0.0" } -datafusion-ffi = { version = "47.0.0" } +arrow = { version = "55.1.0", features = ["pyarrow"] } +datafusion = { version = "48.0.0", features = ["avro", "unicode_expressions"] } +datafusion-substrait = { version = "48.0.0", optional = true } +datafusion-proto = { version = "48.0.0" } +datafusion-ffi = { version = "48.0.0" } prost = "0.13.1" # keep in line with `datafusion-substrait` uuid = { version = "1.16", features = ["v4"] } mimalloc = { version = "0.1", optional = true, default-features = false, features = ["local_dynamic_tls"] } diff --git a/python/datafusion/__init__.py b/python/datafusion/__init__.py index c3468eb4..4f770025 100644 --- a/python/datafusion/__init__.py +++ b/python/datafusion/__init__.py @@ -21,6 +21,10 @@ This is a Python library that binds to Apache Arrow in-memory query engine DataF See https://datafusion.apache.org/python for more information. """ +from __future__ import annotations + +from typing import Any + try: import importlib.metadata as importlib_metadata except ImportError: @@ -130,3 +134,18 @@ def str_lit(value): def lit(value) -> Expr: """Create a literal expression.""" return Expr.literal(value) + + +def literal_with_metadata(value: Any, metadata: dict[str, str]) -> Expr: + """Creates a new expression representing a scalar value with metadata. + + Args: + value: A valid PyArrow scalar value or easily castable to one. + metadata: Metadata to attach to the expression. + """ + return Expr.literal_with_metadata(value, metadata) + + +def lit_with_metadata(value: Any, metadata: dict[str, str]) -> Expr: + """Alias for literal_with_metadata.""" + return literal_with_metadata(value, metadata) diff --git a/python/datafusion/catalog.py b/python/datafusion/catalog.py index 6c3f188c..67ab3ead 100644 --- a/python/datafusion/catalog.py +++ b/python/datafusion/catalog.py @@ -34,6 +34,10 @@ class Catalog: """This constructor is not typically called by the end user.""" self.catalog = catalog + def __repr__(self) -> str: + """Print a string representation of the catalog.""" + return self.catalog.__repr__() + def names(self) -> list[str]: """Returns the list of databases in this catalog.""" return self.catalog.names() @@ -50,6 +54,10 @@ class Database: """This constructor is not typically called by the end user.""" self.db = db + def __repr__(self) -> str: + """Print a string representation of the database.""" + return self.db.__repr__() + def names(self) -> set[str]: """Returns the list of all tables in this database.""" return self.db.names() @@ -66,6 +74,10 @@ class Table: """This constructor is not typically called by the end user.""" self.table = table + def __repr__(self) -> str: + """Print a string representation of the table.""" + return self.table.__repr__() + @property def schema(self) -> pa.Schema: """Returns the schema associated with this table.""" diff --git a/python/datafusion/context.py b/python/datafusion/context.py index 26c3d2e2..4ed465c9 100644 --- a/python/datafusion/context.py +++ b/python/datafusion/context.py @@ -496,6 +496,10 @@ class SessionContext: self.ctx = SessionContextInternal(config, runtime) + def __repr__(self) -> str: + """Print a string representation of the Session Context.""" + return self.ctx.__repr__() + @classmethod def global_ctx(cls) -> SessionContext: """Retrieve the global context as a `SessionContext` wrapper. diff --git a/python/datafusion/expr.py b/python/datafusion/expr.py index 9e58873d..e785cab0 100644 --- a/python/datafusion/expr.py +++ b/python/datafusion/expr.py @@ -435,6 +435,20 @@ class Expr: value = pa.scalar(value) return Expr(expr_internal.RawExpr.literal(value)) + @staticmethod + def literal_with_metadata(value: Any, metadata: dict[str, str]) -> Expr: + """Creates a new expression representing a scalar value with metadata. + + Args: + value: A valid PyArrow scalar value or easily castable to one. + metadata: Metadata to attach to the expression. + """ + if isinstance(value, str): + value = pa.scalar(value, type=pa.string_view()) + value = value if isinstance(value, pa.Scalar) else pa.scalar(value) + + return Expr(expr_internal.RawExpr.literal_with_metadata(value, metadata)) + @staticmethod def string_literal(value: str) -> Expr: """Creates a new expression representing a UTF8 literal value. @@ -1172,6 +1186,10 @@ class WindowFrame: end_bound = end_bound.cast(pa.uint64()) self.window_frame = expr_internal.WindowFrame(units, start_bound, end_bound) + def __repr__(self) -> str: + """Print a string representation of the window frame.""" + return self.window_frame.__repr__() + def get_frame_units(self) -> str: """Returns the window frame units for the bounds.""" return self.window_frame.get_frame_units() diff --git a/python/datafusion/user_defined.py b/python/datafusion/user_defined.py index 9ec3679a..dd634c7f 100644 --- a/python/datafusion/user_defined.py +++ b/python/datafusion/user_defined.py @@ -102,6 +102,10 @@ class ScalarUDF: name, func, input_types, return_type, str(volatility) ) + def __repr__(self) -> str: + """Print a string representation of the Scalar UDF.""" + return self._udf.__repr__() + def __call__(self, *args: Expr) -> Expr: """Execute the UDF. @@ -268,6 +272,10 @@ class AggregateUDF: str(volatility), ) + def __repr__(self) -> str: + """Print a string representation of the Aggregate UDF.""" + return self._udaf.__repr__() + def __call__(self, *args: Expr) -> Expr: """Execute the UDAF. @@ -604,6 +612,10 @@ class WindowUDF: name, func, input_types, return_type, str(volatility) ) + def __repr__(self) -> str: + """Print a string representation of the Window UDF.""" + return self._udwf.__repr__() + def __call__(self, *args: Expr) -> Expr: """Execute the UDWF. diff --git a/python/tests/test_expr.py b/python/tests/test_expr.py index adca783b..40a98dc4 100644 --- a/python/tests/test_expr.py +++ b/python/tests/test_expr.py @@ -19,7 +19,14 @@ from datetime import datetime, timezone import pyarrow as pa import pytest -from datafusion import SessionContext, col, functions, lit +from datafusion import ( + SessionContext, + col, + functions, + lit, + lit_with_metadata, + literal_with_metadata, +) from datafusion.expr import ( Aggregate, AggregateFunction, @@ -103,7 +110,7 @@ def test_limit(test_ctx): plan = plan.to_variant() assert isinstance(plan, Limit) - assert "Skip: Some(Literal(Int64(5)))" in str(plan) + assert "Skip: Some(Literal(Int64(5), None))" in str(plan) def test_aggregate_query(test_ctx): @@ -824,3 +831,52 @@ def test_expr_functions(ctx, function, expected_result): assert len(result) == 1 assert result[0].column(0).equals(expected_result) + + +def test_literal_metadata(ctx): + result = ( + ctx.from_pydict({"a": [1]}) + .select( + lit(1).alias("no_metadata"), + lit_with_metadata(2, {"key1": "value1"}).alias("lit_with_metadata_fn"), + literal_with_metadata(3, {"key2": "value2"}).alias( + "literal_with_metadata_fn" + ), + ) + .collect() + ) + + expected_schema = pa.schema( + [ + pa.field("no_metadata", pa.int64(), nullable=False), + pa.field( + "lit_with_metadata_fn", + pa.int64(), + nullable=False, + metadata={"key1": "value1"}, + ), + pa.field( + "literal_with_metadata_fn", + pa.int64(), + nullable=False, + metadata={"key2": "value2"}, + ), + ] + ) + + expected = pa.RecordBatch.from_pydict( + { + "no_metadata": pa.array([1]), + "lit_with_metadata_fn": pa.array([2]), + "literal_with_metadata_fn": pa.array([3]), + }, + schema=expected_schema, + ) + + assert result[0] == expected + + # Testing result[0].schema == expected_schema does not check each key/value pair + # so we want to explicitly test these + for expected_field in expected_schema: + actual_field = result[0].schema.field(expected_field.name) + assert expected_field.metadata == actual_field.metadata diff --git a/python/tests/test_wrapper_coverage.py b/python/tests/test_wrapper_coverage.py index 926a6596..f484cb28 100644 --- a/python/tests/test_wrapper_coverage.py +++ b/python/tests/test_wrapper_coverage.py @@ -28,14 +28,14 @@ except ImportError: from enum import EnumMeta as EnumType -def missing_exports(internal_obj, wrapped_obj) -> None: +def missing_exports(internal_obj, wrapped_obj) -> None: # noqa: C901 """ Identify if any of the rust exposted structs or functions do not have wrappers. Special handling for: - Raw* classes: Internal implementation details that shouldn't be exposed - _global_ctx: Internal implementation detail - - __self__, __class__: Python special attributes + - __self__, __class__, __repr__: Python special attributes """ # Special case enums - EnumType overrides a some of the internal functions, # so check all of the values exist and move on @@ -45,6 +45,9 @@ def missing_exports(internal_obj, wrapped_obj) -> None: assert value in dir(wrapped_obj) return + if "__repr__" in internal_obj.__dict__ and "__repr__" not in wrapped_obj.__dict__: + pytest.fail(f"Missing __repr__: {internal_obj.__name__}") + for internal_attr_name in dir(internal_obj): wrapped_attr_name = internal_attr_name.removeprefix("Raw") assert wrapped_attr_name in dir(wrapped_obj) diff --git a/src/context.rs b/src/context.rs index b0af566e..55c92a8f 100644 --- a/src/context.rs +++ b/src/context.rs @@ -61,7 +61,7 @@ use datafusion::datasource::TableProvider; use datafusion::execution::context::{ DataFilePaths, SQLOptions, SessionConfig, SessionContext, TaskContext, }; -use datafusion::execution::disk_manager::DiskManagerConfig; +use datafusion::execution::disk_manager::DiskManagerMode; use datafusion::execution::memory_pool::{FairSpillPool, GreedyMemoryPool, UnboundedMemoryPool}; use datafusion::execution::options::ReadOptions; use datafusion::execution::runtime_env::RuntimeEnvBuilder; @@ -183,22 +183,49 @@ impl PyRuntimeEnvBuilder { } fn with_disk_manager_disabled(&self) -> Self { - let mut builder = self.builder.clone(); - builder = builder.with_disk_manager(DiskManagerConfig::Disabled); - Self { builder } + let mut runtime_builder = self.builder.clone(); + + let mut disk_mgr_builder = runtime_builder + .disk_manager_builder + .clone() + .unwrap_or_default(); + disk_mgr_builder.set_mode(DiskManagerMode::Disabled); + + runtime_builder = runtime_builder.with_disk_manager_builder(disk_mgr_builder); + Self { + builder: runtime_builder, + } } fn with_disk_manager_os(&self) -> Self { - let builder = self.builder.clone(); - let builder = builder.with_disk_manager(DiskManagerConfig::NewOs); - Self { builder } + let mut runtime_builder = self.builder.clone(); + + let mut disk_mgr_builder = runtime_builder + .disk_manager_builder + .clone() + .unwrap_or_default(); + disk_mgr_builder.set_mode(DiskManagerMode::OsTmpDirectory); + + runtime_builder = runtime_builder.with_disk_manager_builder(disk_mgr_builder); + Self { + builder: runtime_builder, + } } fn with_disk_manager_specified(&self, paths: Vec<String>) -> Self { - let builder = self.builder.clone(); let paths = paths.iter().map(|s| s.into()).collect(); - let builder = builder.with_disk_manager(DiskManagerConfig::NewSpecified(paths)); - Self { builder } + let mut runtime_builder = self.builder.clone(); + + let mut disk_mgr_builder = runtime_builder + .disk_manager_builder + .clone() + .unwrap_or_default(); + disk_mgr_builder.set_mode(DiskManagerMode::Directories(paths)); + + runtime_builder = runtime_builder.with_disk_manager_builder(disk_mgr_builder); + Self { + builder: runtime_builder, + } } fn with_unbounded_memory_pool(&self) -> Self { diff --git a/src/expr.rs b/src/expr.rs index bc7dbeff..6b1d01d6 100644 --- a/src/expr.rs +++ b/src/expr.rs @@ -15,10 +15,10 @@ // specific language governing permissions and limitations // under the License. -use datafusion::logical_expr::expr::{AggregateFunctionParams, WindowFunctionParams}; +use datafusion::logical_expr::expr::AggregateFunctionParams; use datafusion::logical_expr::utils::exprlist_to_fields; use datafusion::logical_expr::{ - ExprFuncBuilder, ExprFunctionExt, LogicalPlan, WindowFunctionDefinition, + lit_with_metadata, ExprFuncBuilder, ExprFunctionExt, LogicalPlan, WindowFunctionDefinition, }; use pyo3::IntoPyObjectExt; use pyo3::{basic::CompareOp, prelude::*}; @@ -150,7 +150,7 @@ impl PyExpr { Ok(PyScalarVariable::new(data_type, variables).into_bound_py_any(py)?) } Expr::Like(value) => Ok(PyLike::from(value.clone()).into_bound_py_any(py)?), - Expr::Literal(value) => Ok(PyLiteral::from(value.clone()).into_bound_py_any(py)?), + Expr::Literal(value, metadata) => Ok(PyLiteral::new_with_metadata(value.clone(), metadata.clone()).into_bound_py_any(py)?), Expr::BinaryExpr(expr) => Ok(PyBinaryExpr::from(expr.clone()).into_bound_py_any(py)?), Expr::Not(expr) => Ok(PyNot::new(*expr.clone()).into_bound_py_any(py)?), Expr::IsNotNull(expr) => Ok(PyIsNotNull::new(*expr.clone()).into_bound_py_any(py)?), @@ -282,6 +282,14 @@ impl PyExpr { lit(value.0).into() } + #[staticmethod] + pub fn literal_with_metadata( + value: PyScalarValue, + metadata: HashMap<String, String>, + ) -> PyExpr { + lit_with_metadata(value.0, metadata).into() + } + #[staticmethod] pub fn column(value: &str) -> PyExpr { col(value).into() @@ -377,7 +385,7 @@ impl PyExpr { /// Extracts the Expr value into a PyObject that can be shared with Python pub fn python_value(&self, py: Python) -> PyResult<PyObject> { match &self.expr { - Expr::Literal(scalar_value) => scalar_to_pyarrow(scalar_value, py), + Expr::Literal(scalar_value, _) => scalar_to_pyarrow(scalar_value, py), _ => Err(py_type_err(format!( "Non Expr::Literal encountered in types: {:?}", &self.expr @@ -417,11 +425,13 @@ impl PyExpr { params: AggregateFunctionParams { args, .. }, .. }) - | Expr::ScalarFunction(ScalarFunction { args, .. }) - | Expr::WindowFunction(WindowFunction { - params: WindowFunctionParams { args, .. }, - .. - }) => Ok(args.iter().map(|arg| PyExpr::from(arg.clone())).collect()), + | Expr::ScalarFunction(ScalarFunction { args, .. }) => { + Ok(args.iter().map(|arg| PyExpr::from(arg.clone())).collect()) + } + Expr::WindowFunction(boxed_window_fn) => { + let args = &boxed_window_fn.params.args; + Ok(args.iter().map(|arg| PyExpr::from(arg.clone())).collect()) + } // Expr(s) that require more specific processing Expr::Case(Case { @@ -600,10 +610,10 @@ impl PyExpr { ) -> PyDataFusionResult<PyExpr> { match &self.expr { Expr::AggregateFunction(agg_fn) => { - let window_fn = Expr::WindowFunction(WindowFunction::new( + let window_fn = Expr::WindowFunction(Box::new(WindowFunction::new( WindowFunctionDefinition::AggregateUDF(agg_fn.func.clone()), agg_fn.params.args.clone(), - )); + ))); add_builder_fns_to_window( window_fn, @@ -743,7 +753,7 @@ impl PyExpr { | Operator::QuestionPipe => Err(py_type_err(format!("Unsupported expr: ${op}"))), }, Expr::Cast(Cast { expr: _, data_type }) => DataTypeMap::map_from_arrow_type(data_type), - Expr::Literal(scalar_value) => DataTypeMap::map_from_scalar_value(scalar_value), + Expr::Literal(scalar_value, _) => DataTypeMap::map_from_scalar_value(scalar_value), _ => Err(py_type_err(format!( "Non Expr::Literal encountered in types: {:?}", expr diff --git a/src/expr/literal.rs b/src/expr/literal.rs index a660ac91..45303a10 100644 --- a/src/expr/literal.rs +++ b/src/expr/literal.rs @@ -18,11 +18,22 @@ use crate::errors::PyDataFusionError; use datafusion::common::ScalarValue; use pyo3::{prelude::*, IntoPyObjectExt}; +use std::collections::BTreeMap; #[pyclass(name = "Literal", module = "datafusion.expr", subclass)] #[derive(Clone)] pub struct PyLiteral { pub value: ScalarValue, + pub metadata: Option<BTreeMap<String, String>>, +} + +impl PyLiteral { + pub fn new_with_metadata( + value: ScalarValue, + metadata: Option<BTreeMap<String, String>>, + ) -> PyLiteral { + Self { value, metadata } + } } impl From<PyLiteral> for ScalarValue { @@ -33,7 +44,10 @@ impl From<PyLiteral> for ScalarValue { impl From<ScalarValue> for PyLiteral { fn from(value: ScalarValue) -> PyLiteral { - PyLiteral { value } + PyLiteral { + value, + metadata: None, + } } } diff --git a/src/expr/window.rs b/src/expr/window.rs index c5467bf9..052d9eeb 100644 --- a/src/expr/window.rs +++ b/src/expr/window.rs @@ -16,7 +16,6 @@ // under the License. use datafusion::common::{DataFusionError, ScalarValue}; -use datafusion::logical_expr::expr::{WindowFunction, WindowFunctionParams}; use datafusion::logical_expr::{Expr, Window, WindowFrame, WindowFrameBound, WindowFrameUnits}; use pyo3::{prelude::*, IntoPyObjectExt}; use std::fmt::{self, Display, Formatter}; @@ -118,10 +117,9 @@ impl PyWindowExpr { /// Returns order by columns in a window function expression pub fn get_sort_exprs(&self, expr: PyExpr) -> PyResult<Vec<PySortExpr>> { match expr.expr.unalias() { - Expr::WindowFunction(WindowFunction { - params: WindowFunctionParams { order_by, .. }, - .. - }) => py_sort_expr_list(&order_by), + Expr::WindowFunction(boxed_window_fn) => { + py_sort_expr_list(&boxed_window_fn.params.order_by) + } other => Err(not_window_function_err(other)), } } @@ -129,10 +127,9 @@ impl PyWindowExpr { /// Return partition by columns in a window function expression pub fn get_partition_exprs(&self, expr: PyExpr) -> PyResult<Vec<PyExpr>> { match expr.expr.unalias() { - Expr::WindowFunction(WindowFunction { - params: WindowFunctionParams { partition_by, .. }, - .. - }) => py_expr_list(&partition_by), + Expr::WindowFunction(boxed_window_fn) => { + py_expr_list(&boxed_window_fn.params.partition_by) + } other => Err(not_window_function_err(other)), } } @@ -140,10 +137,7 @@ impl PyWindowExpr { /// Return input args for window function pub fn get_args(&self, expr: PyExpr) -> PyResult<Vec<PyExpr>> { match expr.expr.unalias() { - Expr::WindowFunction(WindowFunction { - params: WindowFunctionParams { args, .. }, - .. - }) => py_expr_list(&args), + Expr::WindowFunction(boxed_window_fn) => py_expr_list(&boxed_window_fn.params.args), other => Err(not_window_function_err(other)), } } @@ -151,7 +145,7 @@ impl PyWindowExpr { /// Return window function name pub fn window_func_name(&self, expr: PyExpr) -> PyResult<String> { match expr.expr.unalias() { - Expr::WindowFunction(WindowFunction { fun, .. }) => Ok(fun.to_string()), + Expr::WindowFunction(boxed_window_fn) => Ok(boxed_window_fn.fun.to_string()), other => Err(not_window_function_err(other)), } } @@ -159,10 +153,9 @@ impl PyWindowExpr { /// Returns a Pywindow frame for a given window function expression pub fn get_frame(&self, expr: PyExpr) -> Option<PyWindowFrame> { match expr.expr.unalias() { - Expr::WindowFunction(WindowFunction { - params: WindowFunctionParams { window_frame, .. }, - .. - }) => Some(window_frame.into()), + Expr::WindowFunction(boxed_window_fn) => { + Some(boxed_window_fn.params.window_frame.into()) + } _ => None, } } diff --git a/src/functions.rs b/src/functions.rs index caa79b8a..b2bafcb6 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -103,7 +103,7 @@ fn array_cat(exprs: Vec<PyExpr>) -> PyExpr { #[pyo3(signature = (array, element, index=None))] fn array_position(array: PyExpr, element: PyExpr, index: Option<i64>) -> PyExpr { let index = ScalarValue::Int64(index); - let index = Expr::Literal(index); + let index = Expr::Literal(index, None); datafusion::functions_nested::expr_fn::array_position(array.into(), element.into(), index) .into() } @@ -334,7 +334,7 @@ fn window( .unwrap_or(WindowFrame::new(order_by.as_ref().map(|v| !v.is_empty()))); Ok(PyExpr { - expr: datafusion::logical_expr::Expr::WindowFunction(WindowFunction { + expr: datafusion::logical_expr::Expr::WindowFunction(Box::new(WindowFunction { fun, params: WindowFunctionParams { args: args.into_iter().map(|x| x.expr).collect::<Vec<_>>(), @@ -351,7 +351,7 @@ fn window( window_frame, null_treatment: None, }, - }), + })), }) } diff --git a/src/pyarrow_filter_expression.rs b/src/pyarrow_filter_expression.rs index 4b4c8659..7fbb1dc2 100644 --- a/src/pyarrow_filter_expression.rs +++ b/src/pyarrow_filter_expression.rs @@ -61,7 +61,7 @@ fn extract_scalar_list<'py>( .iter() .map(|expr| match expr { // TODO: should we also leverage `ScalarValue::to_pyarrow` here? - Expr::Literal(v) => match v { + Expr::Literal(v, _) => match v { // The unwraps here are for infallible conversions ScalarValue::Boolean(Some(b)) => Ok(b.into_bound_py_any(py)?), ScalarValue::Int8(Some(i)) => Ok(i.into_bound_py_any(py)?), @@ -106,7 +106,7 @@ impl TryFrom<&Expr> for PyArrowFilterExpression { let op_module = Python::import(py, "operator")?; let pc_expr: PyDataFusionResult<Bound<'_, PyAny>> = match expr { Expr::Column(Column { name, .. }) => Ok(pc.getattr("field")?.call1((name,))?), - Expr::Literal(scalar) => Ok(scalar_to_pyarrow(scalar, py)?.into_bound(py)), + Expr::Literal(scalar, _) => Ok(scalar_to_pyarrow(scalar, py)?.into_bound(py)), Expr::BinaryExpr(BinaryExpr { left, op, right }) => { let operator = operator_to_py(op, &op_module)?; let left = PyArrowFilterExpression::try_from(left.as_ref())?.0; diff --git a/src/udwf.rs b/src/udwf.rs index defd9c52..a0c8cc59 100644 --- a/src/udwf.rs +++ b/src/udwf.rs @@ -300,13 +300,9 @@ impl WindowUDFImpl for MultiColumnWindowUDF { &self.signature } - fn field(&self, field_args: WindowUDFFieldArgs) -> Result<arrow::datatypes::Field> { + fn field(&self, field_args: WindowUDFFieldArgs) -> Result<arrow::datatypes::FieldRef> { // TODO: Should nullable always be `true`? - Ok(arrow::datatypes::Field::new( - field_args.name(), - self.return_type.clone(), - true, - )) + Ok(arrow::datatypes::Field::new(field_args.name(), self.return_type.clone(), true).into()) } // TODO: Enable passing partition_evaluator_args to python? --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@datafusion.apache.org For additional commands, e-mail: commits-h...@datafusion.apache.org