Github user stanzhai commented on the issue: https://github.com/apache/spark/pull/19301 @cenyuhai This is an optimize for physical plan, and your case can be optimized. ```SQL select dt, geohash_of_latlng, sum(mt_cnt), sum(ele_cnt), round(sum(mt_cnt) * 1.0 * 100 / sum(mt_cnt_all), 2), round(sum(ele_cnt) * 1.0 * 100 / sum(ele_cnt_all), 2) from values(1, 2, 3, 4, 5, 6) as (dt, geohash_of_latlng, mt_cnt, ele_cnt, mt_cnt_all, ele_cnt_all) group by dt, geohash_of_latlng order by dt, geohash_of_latlng limit 10 ``` Before: ``` == Physical Plan == TakeOrderedAndProject(limit=10, orderBy=[dt#26 ASC NULLS FIRST,geohash_of_latlng#27 ASC NULLS FIRST], output=[dt#26,geohash_of_latlng#27,sum(mt_cnt)#38L,sum(ele_cnt)#39L,round((CAST((CAST((CAST(CAST(sum(CAST(mt_cnt AS BIGINT)) AS DECIMAL(20,0)) AS DECIMAL(21,1)) * CAST(1.0 AS DECIMAL(21,1))) AS DECIMAL(23,1)) * CAST(CAST(100 AS DECIMAL(23,1)) AS DECIMAL(23,1))) AS DECIMAL(38,2)) / CAST(CAST(sum(CAST(mt_cnt_all AS BIGINT)) AS DECIMAL(20,0)) AS DECIMAL(38,2))), 2)#40,round((CAST((CAST((CAST(CAST(sum(CAST(ele_cnt AS BIGINT)) AS DECIMAL(20,0)) AS DECIMAL(21,1)) * CAST(1.0 AS DECIMAL(21,1))) AS DECIMAL(23,1)) * CAST(CAST(100 AS DECIMAL(23,1)) AS DECIMAL(23,1))) AS DECIMAL(38,2)) / CAST(CAST(sum(CAST(ele_cnt_all AS BIGINT)) AS DECIMAL(20,0)) AS DECIMAL(38,2))), 2)#41]) +- *HashAggregate(keys=[dt#26, geohash_of_latlng#27], functions=[sum(cast(mt_cnt#28 as bigint)), sum(cast(ele_cnt#29 as bigint)), sum(cast(mt_cnt#28 as bigint)), sum(cast(mt_cnt_all#30 as bigint)), sum(cast(ele_cnt#29 as bigint)), sum(cast(ele_cnt_all#31 as bigint))]) +- Exchange hashpartitioning(dt#26, geohash_of_latlng#27, 200) +- *HashAggregate(keys=[dt#26, geohash_of_latlng#27], functions=[partial_sum(cast(mt_cnt#28 as bigint)), partial_sum(cast(ele_cnt#29 as bigint)), partial_sum(cast(mt_cnt#28 as bigint)), partial_sum(cast(mt_cnt_all#30 as bigint)), partial_sum(cast(ele_cnt#29 as bigint)), partial_sum(cast(ele_cnt_all#31 as bigint))]) +- LocalTableScan [dt#26, geohash_of_latlng#27, mt_cnt#28, ele_cnt#29, mt_cnt_all#30, ele_cnt_all#31] ``` After: ``` == Physical Plan == TakeOrderedAndProject(limit=10, orderBy=[dt#28 ASC NULLS FIRST,geohash_of_latlng#29 ASC NULLS FIRST], output=[dt#28,geohash_of_latlng#29,sum(mt_cnt)#34L,sum(ele_cnt)#35L,round((CAST((CAST((CAST(CAST(sum(CAST(mt_cnt AS BIGINT)) AS DECIMAL(20,0)) AS DECIMAL(21,1)) * CAST(1.0 AS DECIMAL(21,1))) AS DECIMAL(23,1)) * CAST(CAST(100 AS DECIMAL(23,1)) AS DECIMAL(23,1))) AS DECIMAL(38,2)) / CAST(CAST(sum(CAST(mt_cnt_all AS BIGINT)) AS DECIMAL(20,0)) AS DECIMAL(38,2))), 2)#36,round((CAST((CAST((CAST(CAST(sum(CAST(ele_cnt AS BIGINT)) AS DECIMAL(20,0)) AS DECIMAL(21,1)) * CAST(1.0 AS DECIMAL(21,1))) AS DECIMAL(23,1)) * CAST(CAST(100 AS DECIMAL(23,1)) AS DECIMAL(23,1))) AS DECIMAL(38,2)) / CAST(CAST(sum(CAST(ele_cnt_all AS BIGINT)) AS DECIMAL(20,0)) AS DECIMAL(38,2))), 2)#37]) +- *HashAggregate(keys=[dt#28, geohash_of_latlng#29], functions=[sum(cast(mt_cnt#30 as bigint)), sum(cast(ele_cnt#31 as bigint)), sum(cast(mt_cnt_all#32 as bigint)), sum(cast(ele_cnt_all#33 as bigint))]) +- Exchange hashpartitioning(dt#28, geohash_of_latlng#29, 200) +- *HashAggregate(keys=[dt#28, geohash_of_latlng#29], functions=[partial_sum(cast(mt_cnt#30 as bigint)), partial_sum(cast(ele_cnt#31 as bigint)), partial_sum(cast(mt_cnt_all#32 as bigint)), partial_sum(cast(ele_cnt_all#33 as bigint))]) +- LocalTableScan [dt#28, geohash_of_latlng#29, mt_cnt#30, ele_cnt#31, mt_cnt_all#32, ele_cnt_all#33] ```
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org