Github user stanzhai commented on the issue:
https://github.com/apache/spark/pull/19301
@cenyuhai This is an optimize for physical plan, and your case can be
optimized.
```SQL
select dt,
geohash_of_latlng,
sum(mt_cnt),
sum(ele_cnt),
round(sum(mt_cnt) * 1.0 * 100 / sum(mt_cnt_all), 2),
round(sum(ele_cnt) * 1.0 * 100 / sum(ele_cnt_all), 2)
from values(1, 2, 3, 4, 5, 6) as (dt, geohash_of_latlng, mt_cnt, ele_cnt,
mt_cnt_all, ele_cnt_all)
group by dt, geohash_of_latlng
order by dt, geohash_of_latlng limit 10
```
Before:
```
== Physical Plan ==
TakeOrderedAndProject(limit=10, orderBy=[dt#26 ASC NULLS
FIRST,geohash_of_latlng#27 ASC NULLS FIRST],
output=[dt#26,geohash_of_latlng#27,sum(mt_cnt)#38L,sum(ele_cnt)#39L,round((CAST((CAST((CAST(CAST(sum(CAST(mt_cnt
AS BIGINT)) AS DECIMAL(20,0)) AS DECIMAL(21,1)) * CAST(1.0 AS DECIMAL(21,1)))
AS DECIMAL(23,1)) * CAST(CAST(100 AS DECIMAL(23,1)) AS DECIMAL(23,1))) AS
DECIMAL(38,2)) / CAST(CAST(sum(CAST(mt_cnt_all AS BIGINT)) AS DECIMAL(20,0)) AS
DECIMAL(38,2))), 2)#40,round((CAST((CAST((CAST(CAST(sum(CAST(ele_cnt AS
BIGINT)) AS DECIMAL(20,0)) AS DECIMAL(21,1)) * CAST(1.0 AS DECIMAL(21,1))) AS
DECIMAL(23,1)) * CAST(CAST(100 AS DECIMAL(23,1)) AS DECIMAL(23,1))) AS
DECIMAL(38,2)) / CAST(CAST(sum(CAST(ele_cnt_all AS BIGINT)) AS DECIMAL(20,0))
AS DECIMAL(38,2))), 2)#41])
+- *HashAggregate(keys=[dt#26, geohash_of_latlng#27],
functions=[sum(cast(mt_cnt#28 as bigint)), sum(cast(ele_cnt#29 as bigint)),
sum(cast(mt_cnt#28 as bigint)), sum(cast(mt_cnt_all#30 as bigint)),
sum(cast(ele_cnt#29 as bigint)), sum(cast(ele_cnt_all#31 as bigint))])
+- Exchange hashpartitioning(dt#26, geohash_of_latlng#27, 200)
+- *HashAggregate(keys=[dt#26, geohash_of_latlng#27],
functions=[partial_sum(cast(mt_cnt#28 as bigint)), partial_sum(cast(ele_cnt#29
as bigint)), partial_sum(cast(mt_cnt#28 as bigint)),
partial_sum(cast(mt_cnt_all#30 as bigint)), partial_sum(cast(ele_cnt#29 as
bigint)), partial_sum(cast(ele_cnt_all#31 as bigint))])
+- LocalTableScan [dt#26, geohash_of_latlng#27, mt_cnt#28,
ele_cnt#29, mt_cnt_all#30, ele_cnt_all#31]
```
After:
```
== Physical Plan ==
TakeOrderedAndProject(limit=10, orderBy=[dt#28 ASC NULLS
FIRST,geohash_of_latlng#29 ASC NULLS FIRST],
output=[dt#28,geohash_of_latlng#29,sum(mt_cnt)#34L,sum(ele_cnt)#35L,round((CAST((CAST((CAST(CAST(sum(CAST(mt_cnt
AS BIGINT)) AS DECIMAL(20,0)) AS DECIMAL(21,1)) * CAST(1.0 AS DECIMAL(21,1)))
AS DECIMAL(23,1)) * CAST(CAST(100 AS DECIMAL(23,1)) AS DECIMAL(23,1))) AS
DECIMAL(38,2)) / CAST(CAST(sum(CAST(mt_cnt_all AS BIGINT)) AS DECIMAL(20,0)) AS
DECIMAL(38,2))), 2)#36,round((CAST((CAST((CAST(CAST(sum(CAST(ele_cnt AS
BIGINT)) AS DECIMAL(20,0)) AS DECIMAL(21,1)) * CAST(1.0 AS DECIMAL(21,1))) AS
DECIMAL(23,1)) * CAST(CAST(100 AS DECIMAL(23,1)) AS DECIMAL(23,1))) AS
DECIMAL(38,2)) / CAST(CAST(sum(CAST(ele_cnt_all AS BIGINT)) AS DECIMAL(20,0))
AS DECIMAL(38,2))), 2)#37])
+- *HashAggregate(keys=[dt#28, geohash_of_latlng#29],
functions=[sum(cast(mt_cnt#30 as bigint)), sum(cast(ele_cnt#31 as bigint)),
sum(cast(mt_cnt_all#32 as bigint)), sum(cast(ele_cnt_all#33 as bigint))])
+- Exchange hashpartitioning(dt#28, geohash_of_latlng#29, 200)
+- *HashAggregate(keys=[dt#28, geohash_of_latlng#29],
functions=[partial_sum(cast(mt_cnt#30 as bigint)), partial_sum(cast(ele_cnt#31
as bigint)), partial_sum(cast(mt_cnt_all#32 as bigint)),
partial_sum(cast(ele_cnt_all#33 as bigint))])
+- LocalTableScan [dt#28, geohash_of_latlng#29, mt_cnt#30,
ele_cnt#31, mt_cnt_all#32, ele_cnt_all#33]
```
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]