alamb commented on code in PR #16625:
URL: https://github.com/apache/datafusion/pull/16625#discussion_r2176095740
##########
datafusion/functions-aggregate/src/array_agg.rs:
##########
@@ -623,8 +668,11 @@ impl Accumulator for OrderSensitiveArrayAggAccumulator {
let mut partition_ordering_values = vec![];
// Existing values should be merged also.
- partition_values.push(self.values.clone().into());
- partition_ordering_values.push(self.ordering_values.clone().into());
+ if !self.is_input_pre_ordered {
Review Comment:
does this have the effect of sorting all inputs even when `ARRAY_AGG` didn't
explicitly call for ordering?
##########
datafusion/sqllogictest/test_files/aggregate.slt:
##########
@@ -232,9 +282,8 @@ physical_plan
01)AggregateExec: mode=Final, gby=[], aggr=[array_agg(agg_order.c1) ORDER BY
[agg_order.c2 DESC NULLS FIRST, agg_order.c3 ASC NULLS LAST]]
02)--CoalescePartitionsExec
03)----AggregateExec: mode=Partial, gby=[], aggr=[array_agg(agg_order.c1)
ORDER BY [agg_order.c2 DESC NULLS FIRST, agg_order.c3 ASC NULLS LAST]]
-04)------SortExec: expr=[c2@1 DESC, c3@2 ASC NULLS LAST],
preserve_partitioning=[true]
-05)--------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-06)----------DataSourceExec: file_groups={1 group:
[[WORKSPACE_ROOT/datafusion/core/tests/data/aggregate_agg_multi_order.csv]]},
projection=[c1, c2, c3], file_type=csv, has_header=true
+04)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
Review Comment:
I wonder if we should benchmark this -- sorting by `ScalarValue` is likely a
lot less efficient than using the fast array sort / etc that is done in SortExec
##########
datafusion/sqllogictest/test_files/aggregate.slt:
##########
@@ -206,6 +206,56 @@ query error Execution error: In an aggregate with
DISTINCT, ORDER BY expressions
SELECT array_agg(DISTINCT c13 ORDER BY c13, c12)
FROM aggregate_test_100
+query ?? rowsort
+with tbl as (SELECT * FROM (VALUES ('xxx', 'yyy'), ('xxx', 'yyy'), ('xxx2',
'yyy2')) AS t(x, y))
+select
+ array_agg(x order by x) as x_agg,
+ array_agg(y order by y) as y_agg
+from tbl
+group by all
+----
+[xxx, xxx, xxx2] [yyy, yyy, yyy2]
+
+query ??
+SELECT
+ (SELECT array_agg(c12 ORDER BY c12) FROM aggregate_test_100),
+ (SELECT array_agg(c13 ORDER BY c13) FROM aggregate_test_100)
+----
+[0.01479305307777301, 0.02182578039211991, 0.03968347085780355,
0.04429073092078406, 0.047343434291126085, 0.04893135681998029,
0.0494924465469434, 0.05573662213439634, 0.05636955101974106,
0.061029375346466685, 0.07260475960924484, 0.09465635123783445,
0.12357539988406441, 0.152498292971736, 0.16301110515739792,
0.1640882545084913, 0.1754261586710173, 0.17592486905979987,
0.17909035118828576, 0.18628859265874176, 0.19113293583306745,
0.2145232647388039, 0.21535402343780985, 0.24899794314659673,
0.2537253407987472, 0.2667177795079635, 0.27159190516490006,
0.2739938529235548, 0.28534428578703896, 0.2944158618048994, 0.296036538664718,
0.3051364088814128, 0.30585375151301186, 0.3114712539863804,
0.3231750610081745, 0.32869374687050157, 0.33639590659276175,
0.3600766362333053, 0.36936304600612724, 0.38870280983958583,
0.39144436569161134, 0.40342283197779727, 0.4094218353587008,
0.40975383525297016, 0.42073125331890115, 0.4273123318932347,
0.42950521730777025, 0.4830878559436823, 0.508
1765563442366, 0.5437595540422571, 0.5590205548347534, 0.5593249815276734,
0.5603062368164834, 0.560333188635217, 0.5614503754617461, 0.565352842229935,
0.574210838214554, 0.5759450483859969, 0.5773498217058918, 0.5991138115095911,
0.6009475544728957, 0.6108938307533, 0.6316565296547284, 0.6404495093354053,
0.6405262429561641, 0.6425694115212065, 0.658671129040488, 0.6668423897406515,
0.6864391962767343, 0.7035635283169166, 0.7325106678655877, 0.7328050041291218,
0.7614304100703713, 0.7631239070049998, 0.7670021786149205, 0.7697753383420857,
0.7764360990307122, 0.7784918983501654, 0.7973920072996036, 0.819715865079681,
0.8506721053047003, 0.8813167497816289, 0.8824879447595726, 0.9185813970744787,
0.9231889896940375, 0.9237877978193884, 0.9255031346434324, 0.9293883502480845,
0.9294097332465232, 0.9463098243875633, 0.946325164889271, 0.9491397432856566,
0.9567595541247681, 0.9706712283358269, 0.9723580396501548, 0.9748360509016578,
0.9800193410444061, 0.980809631269599, 0.9915178286
51004, 0.9965400387585364] [0VVIHzxWtNOFLtnhjHEKjXaJOSLJfm,
0keZ5G8BffGwgF2RwQD59TFzMStxCB, 0og6hSkhbX8AC1ktFS4kounvTzy8Vo,
1aOcrEGd0cOqZe2I5XBOm0nDcwtBZO, 2T3wSlHdEmASmO0xcXHnndkKEt6bz8,
3BEOHQsMEFZ58VcNTOJYShTBpAPzbt, 4HX6feIvmNXBN7XGqgO4YVBkhu8GDI,
4JznSdBajNWhu4hRQwjV1FjTTxY68i, 52mKlRE3aHCBZtjECq6sY9OqVf8Dze,
56MZa5O1hVtX4c5sbnCfxuX5kDChqI, 6FPJlLAcaQ5uokyOWZ9HGdLZObFvOZ,
6WfVFBVGJSQb7FhA7E0lBwdvjfZnSW, 6oIXZuIPIqEoPBvFmbt2Nxy3tryGUE,
6x93sxYioWuq5c9Kkk8oTAAORM7cH0, 802bgTGl6Bk5TlkPYYTxp5JkKyaYUA,
8LIh0b6jmDGm87BmIyjdxNIpX4ugjD, 90gAtmGEeIqUTbo1ZrxCvWtsseukXC,
9UbObCsVkmYpJGcGrgfK90qOnwb2Lj, AFGCj7OWlEB5QfniEFgonMq90Tq5uH,
ALuRhobVWbnQTTWZdSOk0iVe8oYFhW, Amn2K87Db5Es3dFQO9cw9cvpAM6h35,
AyYVExXK6AR2qUTxNZ7qRHQOVGMLcz, BJqx5WokrmrrezZA0dUbleMYkG5U2O,
BPtQMxnuSPpxMExYV9YkDa6cAN7GP3, BsM5ZAYifRh5Lw3Y8X1r53I0cTJnfE,
C2GT5KVyOPZpgKVl110TyZO0NcJ434, DuJNG8tufSqW0ZstHqWj3aGvFLMg4A,
EcCuckwsF3gV1Ecgmh5v4KM8g1ozif, ErJFw6hzZ5fmI5r8bhE4JzlscnhKZU,
F7NSTjWvQJyBburN7CXRUlbgp2dIrA, Fi4rJeTQq
4eXj8Lxg3Hja5hBVTVV5u, H5j5ZHy1FGesOAHjkQEDYCucbpKWRu,
HKSMQ9nTnwXCJIte1JrM1dtYnDtJ8g, IWl0G3ZlMNf7WT8yjIB49cx7MmYOmr,
IZTkHMLvIKuiLjhDjYMmIHxh166we4, Ig1QcuKsjHXkproePdERo2w0mYzIqd,
JHNgc2UCaiXOdmkxwDDyGhRlO0mnBQ, JN0VclewmjwYlSl8386MlWv5rEhWCz,
JafwVLSVk5AVoXFuzclesQ000EE2k1, KJFcmTVjdkCMv94wYCtfHMFhzyRsmH,
Ktb7GQ0N1DrxwkCkEUsTaIXk0xYinn, Ld2ej8NEv5zNcqU60FwpHeZKBhfpiV,
LiEBxds3X0Uw0lxiYjDqrkAaAwoiIW, MXhhH1Var3OzzJCtI9VNyYvA0q8UyJ,
MeSTAXq8gVxVjbEjgkvU9YLte0X9uE, NEhyk8uIx4kEULJGa8qIyFjjBcP2G6,
O66j6PaYuZhEUtqV6fuU7TyjM2WxC5, OF7fQ37GzaZ5ikA2oMyvleKtgnLjXh,
OPwBqCEK5PWTjWaiOyL45u2NLTaDWv, Oq6J4Rx6nde0YlhOIJkFsX2MsSvAQ0,
Ow5PGpfTm4dXCfTDsXAOTatXRoAydR, QEHVvcP8gxI6EMJIrvcnIhgzPNjIvv,
QJYm7YRA3YetcBHI5wkMZeLXVmfuNy, QYlaIAnJA6r8rlAb6f59wcxvcPcWFf,
RilTlL1tKkPOUFuzmLydHAVZwv1OGl, Sfx0vxv1skzZWT1PqVdoRDdO6Sb6xH,
TTQUwpMNSXZqVBKAFvXu7OlWvKXJKX, TtDKUZxzVxsq758G6AWPSYuZgVgbcl,
VDhtJkYjAYPykCgOU9x3v7v3t4SO1a, VY0zXmXeksCT8BzvpzpPLbmU9Kp9Y4,
Vp3gmWunM5A7wOC9YW2JroFqTWjvTi, WHmjWk2AY4c6m7
DA4GitUx6nmb1yYS, XemNcT1xp61xcM1Qz3wZ1VECCnq06O,
Z2sWcQr0qyCJRMHDpRy3aQr7PkHtkK, aDxBtor7Icd9C5hnTvvw5NrIre740e,
akiiY5N0I44CMwEnBL6RTBk7BRkxEj, b3b9esRhTzFEawbs6XhpKnD9ojutHB,
bgK1r6v3BCTh0aejJUhkA1Hn6idXGp, cBGc0kSm32ylBDnxogG727C0uhZEYZ,
cq4WSAIFwx3wwTUS5bp1wCe71R6U5I, dVdvo6nUD5FgCgsbOZLds28RyGTpnx,
e2Gh6Ov8XkXoFdJWhl0EjwEHlMDYyG, f9ALCzwDAKmdu7Rk2msJaB1wxe5IBX,
fuyvs0w7WsKSlXqJ1e6HFSoLmx03AG, gTpyQnEODMcpsPnJMZC66gh33i3m0b,
gpo8K5qtYePve6jyPt6xgJx4YOVjms, gxfHWUF8XgY2KdFxigxvNEXe2V2XMl,
i6RQVXKUh7MzuGMDaNclUYnFUAireU, ioEncce3mPOXD2hWhpZpCPWGATG6GU,
jQimhdepw3GKmioWUlVSWeBVRKFkY3, l7uwDoTepWwnAP0ufqtHJS3CRi7RfP,
lqhzgLsXZ8JhtpeeUWWNbMz8PHI705, m6jD0LBIQWaMfenwRCTANI9eOdyyto,
mhjME0zBHbrK6NMkytMTQzOssOa1gF, mzbkwXKrPeZnxg2Kn1LRF5hYSsmksS,
nYVJnVicpGRqKZibHyBAmtmzBXAFfT, oHJMNvWuunsIMIWFnYG31RCfkOo2V7,
oLZ21P2JEDooxV1pU31cIxQHEeeoLu, okOkcWflkNXIy4R8LzmySyY1EC3sYd,
pLk3i59bZwd5KBZrI1FiweYTd5hteG, pTeu0WMjBRTaNRT15rLCuEh3tBJVc5,
qnPOOmslCJaT45buUisMRnM0rc77EK, t6fQUjJejPcjc04wHvH
TPe55S65B4V, ukOiFGGFnQJDHFgZxHMpvhD3zybF0M, ukyD7b0Efj7tNlFSRmzZ0IqkEzg2a8,
waIGbOGl1PM6gnzZ4uuZt4E2yDWRHs, wwXqSGKLyBQyPkonlzBNYUJTCo4LRS,
xipQ93429ksjNcXPX5326VSg1xJZcW, y7C453hRWd4E7ImjNDWlpexB8nUqjh,
ydkwycaISlYSlEq3TlkS2m15I2pcp8]
+
+query ??
+SELECT
+ array_agg(c12 ORDER BY c12),
+ array_agg(c13 ORDER BY c13)
+FROM aggregate_test_100
+----
+[0.01479305307777301, 0.02182578039211991, 0.03968347085780355,
0.04429073092078406, 0.047343434291126085, 0.04893135681998029,
0.0494924465469434, 0.05573662213439634, 0.05636955101974106,
0.061029375346466685, 0.07260475960924484, 0.09465635123783445,
0.12357539988406441, 0.152498292971736, 0.16301110515739792,
0.1640882545084913, 0.1754261586710173, 0.17592486905979987,
0.17909035118828576, 0.18628859265874176, 0.19113293583306745,
0.2145232647388039, 0.21535402343780985, 0.24899794314659673,
0.2537253407987472, 0.2667177795079635, 0.27159190516490006,
0.2739938529235548, 0.28534428578703896, 0.2944158618048994, 0.296036538664718,
0.3051364088814128, 0.30585375151301186, 0.3114712539863804,
0.3231750610081745, 0.32869374687050157, 0.33639590659276175,
0.3600766362333053, 0.36936304600612724, 0.38870280983958583,
0.39144436569161134, 0.40342283197779727, 0.4094218353587008,
0.40975383525297016, 0.42073125331890115, 0.4273123318932347,
0.42950521730777025, 0.4830878559436823, 0.508
1765563442366, 0.5437595540422571, 0.5590205548347534, 0.5593249815276734,
0.5603062368164834, 0.560333188635217, 0.5614503754617461, 0.565352842229935,
0.574210838214554, 0.5759450483859969, 0.5773498217058918, 0.5991138115095911,
0.6009475544728957, 0.6108938307533, 0.6316565296547284, 0.6404495093354053,
0.6405262429561641, 0.6425694115212065, 0.658671129040488, 0.6668423897406515,
0.6864391962767343, 0.7035635283169166, 0.7325106678655877, 0.7328050041291218,
0.7614304100703713, 0.7631239070049998, 0.7670021786149205, 0.7697753383420857,
0.7764360990307122, 0.7784918983501654, 0.7973920072996036, 0.819715865079681,
0.8506721053047003, 0.8813167497816289, 0.8824879447595726, 0.9185813970744787,
0.9231889896940375, 0.9237877978193884, 0.9255031346434324, 0.9293883502480845,
0.9294097332465232, 0.9463098243875633, 0.946325164889271, 0.9491397432856566,
0.9567595541247681, 0.9706712283358269, 0.9723580396501548, 0.9748360509016578,
0.9800193410444061, 0.980809631269599, 0.9915178286
51004, 0.9965400387585364] [0VVIHzxWtNOFLtnhjHEKjXaJOSLJfm,
0keZ5G8BffGwgF2RwQD59TFzMStxCB, 0og6hSkhbX8AC1ktFS4kounvTzy8Vo,
1aOcrEGd0cOqZe2I5XBOm0nDcwtBZO, 2T3wSlHdEmASmO0xcXHnndkKEt6bz8,
3BEOHQsMEFZ58VcNTOJYShTBpAPzbt, 4HX6feIvmNXBN7XGqgO4YVBkhu8GDI,
4JznSdBajNWhu4hRQwjV1FjTTxY68i, 52mKlRE3aHCBZtjECq6sY9OqVf8Dze,
56MZa5O1hVtX4c5sbnCfxuX5kDChqI, 6FPJlLAcaQ5uokyOWZ9HGdLZObFvOZ,
6WfVFBVGJSQb7FhA7E0lBwdvjfZnSW, 6oIXZuIPIqEoPBvFmbt2Nxy3tryGUE,
6x93sxYioWuq5c9Kkk8oTAAORM7cH0, 802bgTGl6Bk5TlkPYYTxp5JkKyaYUA,
8LIh0b6jmDGm87BmIyjdxNIpX4ugjD, 90gAtmGEeIqUTbo1ZrxCvWtsseukXC,
9UbObCsVkmYpJGcGrgfK90qOnwb2Lj, AFGCj7OWlEB5QfniEFgonMq90Tq5uH,
ALuRhobVWbnQTTWZdSOk0iVe8oYFhW, Amn2K87Db5Es3dFQO9cw9cvpAM6h35,
AyYVExXK6AR2qUTxNZ7qRHQOVGMLcz, BJqx5WokrmrrezZA0dUbleMYkG5U2O,
BPtQMxnuSPpxMExYV9YkDa6cAN7GP3, BsM5ZAYifRh5Lw3Y8X1r53I0cTJnfE,
C2GT5KVyOPZpgKVl110TyZO0NcJ434, DuJNG8tufSqW0ZstHqWj3aGvFLMg4A,
EcCuckwsF3gV1Ecgmh5v4KM8g1ozif, ErJFw6hzZ5fmI5r8bhE4JzlscnhKZU,
F7NSTjWvQJyBburN7CXRUlbgp2dIrA, Fi4rJeTQq
4eXj8Lxg3Hja5hBVTVV5u, H5j5ZHy1FGesOAHjkQEDYCucbpKWRu,
HKSMQ9nTnwXCJIte1JrM1dtYnDtJ8g, IWl0G3ZlMNf7WT8yjIB49cx7MmYOmr,
IZTkHMLvIKuiLjhDjYMmIHxh166we4, Ig1QcuKsjHXkproePdERo2w0mYzIqd,
JHNgc2UCaiXOdmkxwDDyGhRlO0mnBQ, JN0VclewmjwYlSl8386MlWv5rEhWCz,
JafwVLSVk5AVoXFuzclesQ000EE2k1, KJFcmTVjdkCMv94wYCtfHMFhzyRsmH,
Ktb7GQ0N1DrxwkCkEUsTaIXk0xYinn, Ld2ej8NEv5zNcqU60FwpHeZKBhfpiV,
LiEBxds3X0Uw0lxiYjDqrkAaAwoiIW, MXhhH1Var3OzzJCtI9VNyYvA0q8UyJ,
MeSTAXq8gVxVjbEjgkvU9YLte0X9uE, NEhyk8uIx4kEULJGa8qIyFjjBcP2G6,
O66j6PaYuZhEUtqV6fuU7TyjM2WxC5, OF7fQ37GzaZ5ikA2oMyvleKtgnLjXh,
OPwBqCEK5PWTjWaiOyL45u2NLTaDWv, Oq6J4Rx6nde0YlhOIJkFsX2MsSvAQ0,
Ow5PGpfTm4dXCfTDsXAOTatXRoAydR, QEHVvcP8gxI6EMJIrvcnIhgzPNjIvv,
QJYm7YRA3YetcBHI5wkMZeLXVmfuNy, QYlaIAnJA6r8rlAb6f59wcxvcPcWFf,
RilTlL1tKkPOUFuzmLydHAVZwv1OGl, Sfx0vxv1skzZWT1PqVdoRDdO6Sb6xH,
TTQUwpMNSXZqVBKAFvXu7OlWvKXJKX, TtDKUZxzVxsq758G6AWPSYuZgVgbcl,
VDhtJkYjAYPykCgOU9x3v7v3t4SO1a, VY0zXmXeksCT8BzvpzpPLbmU9Kp9Y4,
Vp3gmWunM5A7wOC9YW2JroFqTWjvTi, WHmjWk2AY4c6m7
DA4GitUx6nmb1yYS, XemNcT1xp61xcM1Qz3wZ1VECCnq06O,
Z2sWcQr0qyCJRMHDpRy3aQr7PkHtkK, aDxBtor7Icd9C5hnTvvw5NrIre740e,
akiiY5N0I44CMwEnBL6RTBk7BRkxEj, b3b9esRhTzFEawbs6XhpKnD9ojutHB,
bgK1r6v3BCTh0aejJUhkA1Hn6idXGp, cBGc0kSm32ylBDnxogG727C0uhZEYZ,
cq4WSAIFwx3wwTUS5bp1wCe71R6U5I, dVdvo6nUD5FgCgsbOZLds28RyGTpnx,
e2Gh6Ov8XkXoFdJWhl0EjwEHlMDYyG, f9ALCzwDAKmdu7Rk2msJaB1wxe5IBX,
fuyvs0w7WsKSlXqJ1e6HFSoLmx03AG, gTpyQnEODMcpsPnJMZC66gh33i3m0b,
gpo8K5qtYePve6jyPt6xgJx4YOVjms, gxfHWUF8XgY2KdFxigxvNEXe2V2XMl,
i6RQVXKUh7MzuGMDaNclUYnFUAireU, ioEncce3mPOXD2hWhpZpCPWGATG6GU,
jQimhdepw3GKmioWUlVSWeBVRKFkY3, l7uwDoTepWwnAP0ufqtHJS3CRi7RfP,
lqhzgLsXZ8JhtpeeUWWNbMz8PHI705, m6jD0LBIQWaMfenwRCTANI9eOdyyto,
mhjME0zBHbrK6NMkytMTQzOssOa1gF, mzbkwXKrPeZnxg2Kn1LRF5hYSsmksS,
nYVJnVicpGRqKZibHyBAmtmzBXAFfT, oHJMNvWuunsIMIWFnYG31RCfkOo2V7,
oLZ21P2JEDooxV1pU31cIxQHEeeoLu, okOkcWflkNXIy4R8LzmySyY1EC3sYd,
pLk3i59bZwd5KBZrI1FiweYTd5hteG, pTeu0WMjBRTaNRT15rLCuEh3tBJVc5,
qnPOOmslCJaT45buUisMRnM0rc77EK, t6fQUjJejPcjc04wHvH
TPe55S65B4V, ukOiFGGFnQJDHFgZxHMpvhD3zybF0M, ukyD7b0Efj7tNlFSRmzZ0IqkEzg2a8,
waIGbOGl1PM6gnzZ4uuZt4E2yDWRHs, wwXqSGKLyBQyPkonlzBNYUJTCo4LRS,
xipQ93429ksjNcXPX5326VSg1xJZcW, y7C453hRWd4E7ImjNDWlpexB8nUqjh,
ydkwycaISlYSlEq3TlkS2m15I2pcp8]
+
+query ?? rowsort
+with tbl as (SELECT * FROM (VALUES ('xxx', 'yyy'), ('xxx', 'yyy'), ('xxx2',
'yyy2')) AS t(x, y))
+select
+ array_agg(distinct x order by x) as x_agg,
Review Comment:
These columns already look distinct to me -- maybe testing with another
column would be better to show that distinct is actually working as designed
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]