This is an automated email from the ASF dual-hosted git repository.
jayzhan pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 7638a26979 Introduce FunctionRegistry dependency to optimize and
rewrite rule (#10714)
7638a26979 is described below
commit 7638a26979382f98fe9725a75424acf0788ff26a
Author: Jay Zhan <[email protected]>
AuthorDate: Sat Jun 1 09:49:10 2024 +0800
Introduce FunctionRegistry dependency to optimize and rewrite rule (#10714)
* mv function registry to expr
Signed-off-by: jayzhan211 <[email protected]>
* registry move to config trait
Signed-off-by: jayzhan211 <[email protected]>
* fix test
Signed-off-by: jayzhan211 <[email protected]>
* fix test
Signed-off-by: jayzhan211 <[email protected]>
* rm dependency
Signed-off-by: jayzhan211 <[email protected]>
* fix cli cargo lock
Signed-off-by: jayzhan211 <[email protected]>
---------
Signed-off-by: jayzhan211 <[email protected]>
---
datafusion-cli/Cargo.lock | 160 ++++++++++-----------
datafusion/core/src/execution/context/mod.rs | 4 +
datafusion/execution/src/lib.rs | 7 +-
datafusion/expr/src/lib.rs | 1 +
datafusion/{execution => expr}/src/registry.rs | 4 +-
datafusion/optimizer/Cargo.toml | 1 -
datafusion/optimizer/src/optimizer.rs | 5 +
.../optimizer/src/replace_distinct_aggregate.rs | 69 ++-------
datafusion/sqllogictest/test_files/distinct_on.slt | 36 +++++
9 files changed, 147 insertions(+), 140 deletions(-)
diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock
index 62b6ad287a..6a1ba8aba0 100644
--- a/datafusion-cli/Cargo.lock
+++ b/datafusion-cli/Cargo.lock
@@ -4,9 +4,9 @@ version = 3
[[package]]
name = "addr2line"
-version = "0.21.0"
+version = "0.22.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8a30b2e23b9e17a9f90641c7ab1549cd9b44f296d3ccbf309d2863cfe398a0cb"
+checksum = "6e4503c46a5c0c7844e948c9a4d6acd9f50cccb4de1c48eb9e291ea17470c678"
dependencies = [
"gimli",
]
@@ -363,9 +363,9 @@ dependencies = [
[[package]]
name = "async-compression"
-version = "0.4.9"
+version = "0.4.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4e9eabd7a98fe442131a17c316bd9349c43695e49e730c3c8e12cfb5f4da2693"
+checksum = "cd066d0b4ef8ecb03a55319dc13aa6910616d0f44008a045bb1835af830abff5"
dependencies = [
"bzip2",
"flate2",
@@ -387,7 +387,7 @@ checksum =
"c6fa2087f2753a7da8cc1c0dbfcf89579dd57458e36769de5ac750b4671737ca"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.61",
+ "syn 2.0.66",
]
[[package]]
@@ -708,9 +708,9 @@ dependencies = [
[[package]]
name = "backtrace"
-version = "0.3.71"
+version = "0.3.72"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "26b05800d2e817c8b3b4b54abd461726265fa9789ae34330622f2db9ee696f9d"
+checksum = "17c6a35df3749d2e8bb1b7b21a976d82b15548788d2735b9d82f329268f71a11"
dependencies = [
"addr2line",
"cc",
@@ -869,9 +869,9 @@ dependencies = [
[[package]]
name = "cc"
-version = "1.0.97"
+version = "1.0.98"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "099a5357d84c4c61eb35fc8eafa9a79a902c2f76911e5747ced4e032edd8d9b4"
+checksum = "41c270e7540d725e65ac7f1b212ac8ce349719624d7bcff99f8e2e488e8cf03f"
dependencies = [
"jobserver",
"libc",
@@ -1042,9 +1042,9 @@ dependencies = [
[[package]]
name = "crc32fast"
-version = "1.4.0"
+version = "1.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b3855a8a784b474f333699ef2bbca9db2c4a1f6d9088a90a2d25b1eb53111eaa"
+checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3"
dependencies = [
"cfg-if",
]
@@ -1093,7 +1093,7 @@ source =
"registry+https://github.com/rust-lang/crates.io-index"
checksum = "edb49164822f3ee45b17acd4a208cfc1251410cf0cad9a833234c9890774dd9f"
dependencies = [
"quote",
- "syn 2.0.61",
+ "syn 2.0.66",
]
[[package]]
@@ -1325,7 +1325,6 @@ dependencies = [
"chrono",
"datafusion-common",
"datafusion-expr",
- "datafusion-functions-aggregate",
"datafusion-physical-expr",
"hashbrown 0.14.5",
"indexmap 2.2.6",
@@ -1495,9 +1494,9 @@ checksum =
"fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10"
[[package]]
name = "either"
-version = "1.11.0"
+version = "1.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a47c1c47d2f5964e29c61246e81db715514cd532db6b5116a25ea3c03d6780a2"
+checksum = "3dca9240753cf90908d7e4aac30f630662b02aebaa1b58a3cadabdb23385b58b"
[[package]]
name = "encoding_rs"
@@ -1535,9 +1534,9 @@ checksum =
"5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5"
[[package]]
name = "errno"
-version = "0.3.8"
+version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a258e46cdc063eb8519c00b9fc845fc47bcfca4130e2f08e88665ceda8474245"
+checksum = "534c5cf6194dfab3db3242765c03bbe257cf92f22b38f6bc0c58d59108a820ba"
dependencies = [
"libc",
"windows-sys 0.52.0",
@@ -1685,7 +1684,7 @@ checksum =
"87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.61",
+ "syn 2.0.66",
]
[[package]]
@@ -1747,9 +1746,9 @@ dependencies = [
[[package]]
name = "gimli"
-version = "0.28.1"
+version = "0.29.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253"
+checksum = "40ecd4077b5ae9fd2e9e169b102c6c330d0605168eb0e8bf79952b256dbefffd"
[[package]]
name = "glob"
@@ -1987,9 +1986,9 @@ dependencies = [
[[package]]
name = "instant"
-version = "0.1.12"
+version = "0.1.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c"
+checksum = "e0242819d153cba4b4b05a5a8f2a7e9bbf97b6055b2a002b395c96b5ff3c0222"
dependencies = [
"cfg-if",
"js-sys",
@@ -2114,9 +2113,9 @@ dependencies = [
[[package]]
name = "libc"
-version = "0.2.154"
+version = "0.2.155"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ae743338b92ff9146ce83992f766a31066a91a8c84a45e0e9f21e7cf6de6d346"
+checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c"
[[package]]
name = "libflate"
@@ -2150,9 +2149,9 @@ checksum =
"4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058"
[[package]]
name = "libmimalloc-sys"
-version = "0.1.37"
+version = "0.1.38"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "81eb4061c0582dedea1cbc7aff2240300dd6982e0239d1c99e65c1dbf4a30ba7"
+checksum = "0e7bb23d733dfcc8af652a78b7bf232f0e967710d044732185e561e47c0336b6"
dependencies = [
"cc",
"libc",
@@ -2170,9 +2169,9 @@ dependencies = [
[[package]]
name = "linux-raw-sys"
-version = "0.4.13"
+version = "0.4.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "01cda141df6706de531b6c46c3a33ecca755538219bd484262fa09410c13539c"
+checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89"
[[package]]
name = "lock_api"
@@ -2228,9 +2227,9 @@ checksum =
"6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d"
[[package]]
name = "mimalloc"
-version = "0.1.41"
+version = "0.1.42"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9f41a2280ded0da56c8cf898babb86e8f10651a34adcfff190ae9a1159c6908d"
+checksum = "e9186d86b79b52f4a77af65604b51225e8db1d6ee7e3f41aec1e40829c71a176"
dependencies = [
"libmimalloc-sys",
]
@@ -2243,9 +2242,9 @@ checksum =
"6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a"
[[package]]
name = "miniz_oxide"
-version = "0.7.2"
+version = "0.7.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9d811f3e15f28568be3407c8e7fdb6514c1cda3cb30683f15b6a1a1dc4ea14a7"
+checksum = "87dfd01fe195c66b572b37921ad8803d010623c0aca821bea2302239d155cdae"
dependencies = [
"adler",
]
@@ -2289,9 +2288,9 @@ checksum =
"61807f77802ff30975e01f4f071c8ba10c022052f98b3294119f3e615d13e5be"
[[package]]
name = "num"
-version = "0.4.2"
+version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3135b08af27d103b0a51f2ae0f8632117b7b185ccf931445affa8df530576a41"
+checksum = "35bd024e8b2ff75562e5f34e7f4905839deb4b22955ef5e73d2fea1b9813cb23"
dependencies = [
"num-bigint",
"num-complex",
@@ -2313,9 +2312,9 @@ dependencies = [
[[package]]
name = "num-complex"
-version = "0.4.5"
+version = "0.4.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "23c6602fda94a57c990fe0df199a035d83576b496aa29f4e634a8ac6004e68a6"
+checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495"
dependencies = [
"num-traits",
]
@@ -2348,11 +2347,10 @@ dependencies = [
[[package]]
name = "num-rational"
-version = "0.4.1"
+version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0638a1c9d0a3c0914158145bc76cff373a75a627e6ecbfb71cbe6f453a5a19b0"
+checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824"
dependencies = [
- "autocfg",
"num-bigint",
"num-integer",
"num-traits",
@@ -2380,9 +2378,9 @@ dependencies = [
[[package]]
name = "object"
-version = "0.32.2"
+version = "0.35.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a6a622008b6e321afc04970976f62ee297fdbaa6f95318ca343e3eebb9648441"
+checksum = "b8ec7ab813848ba4522158d5517a6093db1ded27575b070f4177b8d12b41db5e"
dependencies = [
"memchr",
]
@@ -2453,9 +2451,9 @@ checksum =
"4030760ffd992bef45b0ae3f10ce1aba99e33464c90d14dd7c039884963ddc7a"
[[package]]
name = "parking_lot"
-version = "0.12.2"
+version = "0.12.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7e4af0ca4f6caed20e900d564c242b8e5d4903fdacf31d3daf527b66fe6f42fb"
+checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27"
dependencies = [
"lock_api",
"parking_lot_core",
@@ -2532,9 +2530,9 @@ checksum =
"e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e"
[[package]]
name = "petgraph"
-version = "0.6.4"
+version = "0.6.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e1d3afd2628e69da2be385eb6f2fd57c8ac7977ceeff6dc166ff1657b0e386a9"
+checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db"
dependencies = [
"fixedbitset",
"indexmap 2.2.6",
@@ -2595,7 +2593,7 @@ checksum =
"2f38a4412a78282e09a2cf38d195ea5420d15ba0602cb375210efbc877243965"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.61",
+ "syn 2.0.66",
]
[[package]]
@@ -2684,9 +2682,9 @@ dependencies = [
[[package]]
name = "proc-macro2"
-version = "1.0.82"
+version = "1.0.84"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8ad3d49ab951a01fbaafe34f2ec74122942fe18a3f9814c3268f1bb72042131b"
+checksum = "ec96c6a92621310b51366f1e28d05ef11489516e93be030060e5fc12024a49d6"
dependencies = [
"unicode-ident",
]
@@ -3001,9 +2999,9 @@ dependencies = [
[[package]]
name = "rustls-pki-types"
-version = "1.6.0"
+version = "1.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "51f344d206c5e1b010eec27349b815a4805f70a778895959d70b74b9b529b30a"
+checksum = "976295e77ce332211c0d24d92c0e83e50f5c5f046d11082cea19f3df13a3562d"
[[package]]
name = "rustls-webpki"
@@ -3017,9 +3015,9 @@ dependencies = [
[[package]]
name = "rustversion"
-version = "1.0.16"
+version = "1.0.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "092474d1a01ea8278f69e6a358998405fae5b8b963ddaeb2b0b04a128bf1dfb0"
+checksum = "955d28af4278de8121b7ebeb796b6a45735dc01436d898801014aced2773a3d6"
[[package]]
name = "rustyline"
@@ -3121,29 +3119,29 @@ checksum =
"a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4"
[[package]]
name = "serde"
-version = "1.0.200"
+version = "1.0.203"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ddc6f9cc94d67c0e21aaf7eda3a010fd3af78ebf6e096aa6e2e13c79749cce4f"
+checksum = "7253ab4de971e72fb7be983802300c30b5a7f0c2e56fab8abfc6a214307c0094"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
-version = "1.0.200"
+version = "1.0.203"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "856f046b9400cee3c8c94ed572ecdb752444c24528c035cd35882aad6f492bcb"
+checksum = "500cbc0ebeb6f46627f50f3f5811ccf6bf00643be300b4c3eabc0ef55dc5b5ba"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.61",
+ "syn 2.0.66",
]
[[package]]
name = "serde_json"
-version = "1.0.116"
+version = "1.0.117"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3e17db7126d17feb94eb3fad46bf1a96b034e8aacbc2e775fe81505f8b0b2813"
+checksum = "455182ea6142b14f93f4bc5320a2b31c1f266b66a4a5c858b013302a5d8cbfc3"
dependencies = [
"itoa",
"ryu",
@@ -3271,7 +3269,7 @@ checksum =
"01b2e185515564f15375f593fb966b5718bc624ba77fe49fa4616ad619690554"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.61",
+ "syn 2.0.66",
]
[[package]]
@@ -3317,7 +3315,7 @@ dependencies = [
"proc-macro2",
"quote",
"rustversion",
- "syn 2.0.61",
+ "syn 2.0.66",
]
[[package]]
@@ -3330,7 +3328,7 @@ dependencies = [
"proc-macro2",
"quote",
"rustversion",
- "syn 2.0.61",
+ "syn 2.0.66",
]
[[package]]
@@ -3352,9 +3350,9 @@ dependencies = [
[[package]]
name = "syn"
-version = "2.0.61"
+version = "2.0.66"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c993ed8ccba56ae856363b1845da7266a7cb78e1d146c8a32d54b45a8b831fc9"
+checksum = "c42f3f41a2de00b01c0aaad383c5a45241efc8b2d1eda5661812fda5f3cdcff5"
dependencies = [
"proc-macro2",
"quote",
@@ -3423,22 +3421,22 @@ checksum =
"23d434d3f8967a09480fb04132ebe0a3e088c173e6d0ee7897abbdf4eab0f8b9"
[[package]]
name = "thiserror"
-version = "1.0.60"
+version = "1.0.61"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "579e9083ca58dd9dcf91a9923bb9054071b9ebbd800b342194c9feb0ee89fc18"
+checksum = "c546c80d6be4bc6a00c0f01730c08df82eaa7a7a61f11d656526506112cc1709"
dependencies = [
"thiserror-impl",
]
[[package]]
name = "thiserror-impl"
-version = "1.0.60"
+version = "1.0.61"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e2470041c06ec3ac1ab38d0356a6119054dedaea53e12fbefc0de730a1c08524"
+checksum = "46c3384250002a6d5af4d114f2845d37b57521033f30d5c3f46c4d70e1197533"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.61",
+ "syn 2.0.66",
]
[[package]]
@@ -3508,9 +3506,9 @@ checksum =
"1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
[[package]]
name = "tokio"
-version = "1.37.0"
+version = "1.38.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1adbebffeca75fcfd058afa480fb6c0b81e165a0323f9c9d39c9697e37c46787"
+checksum = "ba4f4a02a7a80d6f274636f0aa95c7e383b912d41fe721a31f29e29698585a4a"
dependencies = [
"backtrace",
"bytes",
@@ -3527,13 +3525,13 @@ dependencies = [
[[package]]
name = "tokio-macros"
-version = "2.2.0"
+version = "2.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5b8a1e28f2deaa14e508979454cb3a223b10b938b45af148bc0986de36f1923b"
+checksum = "5f5ae998a069d4b5aba8ee9dad856af7d520c3699e6159b185c2acd48155d39a"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.61",
+ "syn 2.0.66",
]
[[package]]
@@ -3629,7 +3627,7 @@ checksum =
"34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.61",
+ "syn 2.0.66",
]
[[package]]
@@ -3674,7 +3672,7 @@ checksum =
"f03ca4cb38206e2bef0700092660bb74d696f808514dae47fa1467cbfe26e96e"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.61",
+ "syn 2.0.66",
]
[[package]]
@@ -3828,7 +3826,7 @@ dependencies = [
"once_cell",
"proc-macro2",
"quote",
- "syn 2.0.61",
+ "syn 2.0.66",
"wasm-bindgen-shared",
]
@@ -3862,7 +3860,7 @@ checksum =
"e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.61",
+ "syn 2.0.66",
"wasm-bindgen-backend",
"wasm-bindgen-shared",
]
@@ -4127,14 +4125,14 @@ checksum =
"15e934569e47891f7d9411f1a451d947a60e000ab3bd24fbb970f000387d1b3b"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.61",
+ "syn 2.0.66",
]
[[package]]
name = "zeroize"
-version = "1.7.0"
+version = "1.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "525b4ec142c6b68a2d10f01f7bbf6755599ca3f81ea53b8431b7dd348f5fdb2d"
+checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde"
[[package]]
name = "zstd"
diff --git a/datafusion/core/src/execution/context/mod.rs
b/datafusion/core/src/execution/context/mod.rs
index cb0dfd0791..745eff550f 100644
--- a/datafusion/core/src/execution/context/mod.rs
+++ b/datafusion/core/src/execution/context/mod.rs
@@ -2350,6 +2350,10 @@ impl OptimizerConfig for SessionState {
fn options(&self) -> &ConfigOptions {
self.config_options()
}
+
+ fn function_registry(&self) -> Option<&dyn FunctionRegistry> {
+ Some(self)
+ }
}
/// Create a new task context instance from SessionContext
diff --git a/datafusion/execution/src/lib.rs b/datafusion/execution/src/lib.rs
index a1a1551c2c..2fe0d83b1d 100644
--- a/datafusion/execution/src/lib.rs
+++ b/datafusion/execution/src/lib.rs
@@ -22,11 +22,16 @@ pub mod config;
pub mod disk_manager;
pub mod memory_pool;
pub mod object_store;
-pub mod registry;
pub mod runtime_env;
mod stream;
mod task;
+pub mod registry {
+ pub use datafusion_expr::registry::{
+ FunctionRegistry, MemoryFunctionRegistry, SerializerRegistry,
+ };
+}
+
pub use disk_manager::DiskManager;
pub use registry::FunctionRegistry;
pub use stream::{RecordBatchStream, SendableRecordBatchStream};
diff --git a/datafusion/expr/src/lib.rs b/datafusion/expr/src/lib.rs
index d0114a4725..74d6b4149d 100644
--- a/datafusion/expr/src/lib.rs
+++ b/datafusion/expr/src/lib.rs
@@ -48,6 +48,7 @@ pub mod function;
pub mod groups_accumulator;
pub mod interval_arithmetic;
pub mod logical_plan;
+pub mod registry;
pub mod simplify;
pub mod sort_properties;
pub mod tree_node;
diff --git a/datafusion/execution/src/registry.rs
b/datafusion/expr/src/registry.rs
similarity index 98%
rename from datafusion/execution/src/registry.rs
rename to datafusion/expr/src/registry.rs
index f3714a11c2..70d0a21a87 100644
--- a/datafusion/execution/src/registry.rs
+++ b/datafusion/expr/src/registry.rs
@@ -17,9 +17,9 @@
//! FunctionRegistry trait
+use crate::expr_rewriter::FunctionRewrite;
+use crate::{AggregateUDF, ScalarUDF, UserDefinedLogicalNode, WindowUDF};
use datafusion_common::{not_impl_err, plan_datafusion_err, Result};
-use datafusion_expr::expr_rewriter::FunctionRewrite;
-use datafusion_expr::{AggregateUDF, ScalarUDF, UserDefinedLogicalNode,
WindowUDF};
use std::collections::HashMap;
use std::{collections::HashSet, sync::Arc};
diff --git a/datafusion/optimizer/Cargo.toml b/datafusion/optimizer/Cargo.toml
index 59c0b476c7..67d5c9b23b 100644
--- a/datafusion/optimizer/Cargo.toml
+++ b/datafusion/optimizer/Cargo.toml
@@ -45,7 +45,6 @@ async-trait = { workspace = true }
chrono = { workspace = true }
datafusion-common = { workspace = true, default-features = true }
datafusion-expr = { workspace = true }
-datafusion-functions-aggregate = { workspace = true }
datafusion-physical-expr = { workspace = true }
hashbrown = { workspace = true }
indexmap = { workspace = true }
diff --git a/datafusion/optimizer/src/optimizer.rs
b/datafusion/optimizer/src/optimizer.rs
index 0501f5b8a4..998eeb7167 100644
--- a/datafusion/optimizer/src/optimizer.rs
+++ b/datafusion/optimizer/src/optimizer.rs
@@ -21,6 +21,7 @@ use std::collections::HashSet;
use std::sync::Arc;
use chrono::{DateTime, Utc};
+use datafusion_expr::registry::FunctionRegistry;
use log::{debug, warn};
use datafusion_common::alias::AliasGenerator;
@@ -122,6 +123,10 @@ pub trait OptimizerConfig {
fn alias_generator(&self) -> Arc<AliasGenerator>;
fn options(&self) -> &ConfigOptions;
+
+ fn function_registry(&self) -> Option<&dyn FunctionRegistry> {
+ None
+ }
}
/// A standalone [`OptimizerConfig`] that can be used independently
diff --git a/datafusion/optimizer/src/replace_distinct_aggregate.rs
b/datafusion/optimizer/src/replace_distinct_aggregate.rs
index dcd13c58b9..752e2b2007 100644
--- a/datafusion/optimizer/src/replace_distinct_aggregate.rs
+++ b/datafusion/optimizer/src/replace_distinct_aggregate.rs
@@ -21,11 +21,11 @@ use crate::{OptimizerConfig, OptimizerRule};
use datafusion_common::tree_node::Transformed;
use datafusion_common::{internal_err, Column, Result};
+use datafusion_expr::expr::AggregateFunction;
use datafusion_expr::expr_rewriter::normalize_cols;
use datafusion_expr::utils::expand_wildcard;
use datafusion_expr::{col, LogicalPlanBuilder};
use datafusion_expr::{Aggregate, Distinct, DistinctOn, Expr, LogicalPlan};
-use datafusion_functions_aggregate::first_last::first_value;
/// Optimizer that replaces logical [[Distinct]] with a logical [[Aggregate]]
///
@@ -73,7 +73,7 @@ impl OptimizerRule for ReplaceDistinctWithAggregate {
fn rewrite(
&self,
plan: LogicalPlan,
- _config: &dyn OptimizerConfig,
+ config: &dyn OptimizerConfig,
) -> Result<Transformed<LogicalPlan>> {
match plan {
LogicalPlan::Distinct(Distinct::All(input)) => {
@@ -95,9 +95,18 @@ impl OptimizerRule for ReplaceDistinctWithAggregate {
let expr_cnt = on_expr.len();
// Construct the aggregation expression to be used to fetch
the selected expressions.
- let aggr_expr = select_expr
- .into_iter()
- .map(|e| first_value(vec![e], false, None,
sort_expr.clone(), None));
+ let first_value_udaf =
+ config.function_registry().unwrap().udaf("first_value")?;
+ let aggr_expr = select_expr.into_iter().map(|e| {
+ Expr::AggregateFunction(AggregateFunction::new_udf(
+ first_value_udaf.clone(),
+ vec![e],
+ false,
+ None,
+ sort_expr.clone(),
+ None,
+ ))
+ });
let aggr_expr = normalize_cols(aggr_expr, input.as_ref())?;
let group_expr = normalize_cols(on_expr, input.as_ref())?;
@@ -163,53 +172,3 @@ impl OptimizerRule for ReplaceDistinctWithAggregate {
Some(BottomUp)
}
}
-
-#[cfg(test)]
-mod tests {
- use crate::replace_distinct_aggregate::ReplaceDistinctWithAggregate;
- use crate::test::{assert_optimized_plan_eq, test_table_scan};
- use datafusion_expr::{col, LogicalPlanBuilder};
- use std::sync::Arc;
-
- #[test]
- fn replace_distinct() -> datafusion_common::Result<()> {
- let table_scan = test_table_scan().unwrap();
- let plan = LogicalPlanBuilder::from(table_scan)
- .project(vec![col("a"), col("b")])?
- .distinct()?
- .build()?;
-
- let expected = "Aggregate: groupBy=[[test.a, test.b]], aggr=[[]]\
- \n Projection: test.a, test.b\
- \n TableScan: test";
-
- assert_optimized_plan_eq(
- Arc::new(ReplaceDistinctWithAggregate::new()),
- plan,
- expected,
- )
- }
-
- #[test]
- fn replace_distinct_on() -> datafusion_common::Result<()> {
- let table_scan = test_table_scan().unwrap();
- let plan = LogicalPlanBuilder::from(table_scan)
- .distinct_on(
- vec![col("a")],
- vec![col("b")],
- Some(vec![col("a").sort(false, true), col("c").sort(true,
false)]),
- )?
- .build()?;
-
- let expected = "Projection: first_value(test.b) ORDER BY [test.a DESC
NULLS FIRST, test.c ASC NULLS LAST] AS b\
- \n Sort: test.a DESC NULLS FIRST\
- \n Aggregate: groupBy=[[test.a]], aggr=[[first_value(test.b) ORDER
BY [test.a DESC NULLS FIRST, test.c ASC NULLS LAST]]]\
- \n TableScan: test";
-
- assert_optimized_plan_eq(
- Arc::new(ReplaceDistinctWithAggregate::new()),
- plan,
- expected,
- )
- }
-}
diff --git a/datafusion/sqllogictest/test_files/distinct_on.slt
b/datafusion/sqllogictest/test_files/distinct_on.slt
index beef865bca..99639d78c3 100644
--- a/datafusion/sqllogictest/test_files/distinct_on.slt
+++ b/datafusion/sqllogictest/test_files/distinct_on.slt
@@ -143,3 +143,39 @@ LIMIT 3;
-25 15295
45 15673
-72 -11122
+
+# test distinct on
+statement ok
+create table t(a int, b int, c int) as values (1, 2, 3);
+
+statement ok
+set datafusion.explain.logical_plan_only = true;
+
+query TT
+explain select distinct on (a) b from t order by a desc, c;
+----
+logical_plan
+01)Projection: first_value(t.b) ORDER BY [t.a DESC NULLS FIRST, t.c ASC NULLS
LAST] AS b
+02)--Sort: t.a DESC NULLS FIRST
+03)----Aggregate: groupBy=[[t.a]], aggr=[[first_value(t.b) ORDER BY [t.a DESC
NULLS FIRST, t.c ASC NULLS LAST]]]
+04)------TableScan: t projection=[a, b, c]
+
+statement ok
+drop table t;
+
+# test distinct
+statement ok
+create table t(a int, b int) as values (1, 2);
+
+statement ok
+set datafusion.explain.logical_plan_only = true;
+
+query TT
+explain select distinct a, b from t;
+----
+logical_plan
+01)Aggregate: groupBy=[[t.a, t.b]], aggr=[[]]
+02)--TableScan: t projection=[a, b]
+
+statement ok
+drop table t;
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]