[ https://issues.apache.org/jira/browse/SPARK-37581?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17684376#comment-17684376 ]
XiDuo You commented on SPARK-37581: ----------------------------------- This should be resovled by SPARK-38138. The root reason is dpp will introduce many subqueries. > sql hang at planning stage > -------------------------- > > Key: SPARK-37581 > URL: https://issues.apache.org/jira/browse/SPARK-37581 > Project: Spark > Issue Type: Bug > Components: SQL > Affects Versions: 3.1.1, 3.2.0 > Reporter: ocean > Priority: Major > > when exec a sql, this sql hang at planning stage. > when disable DPP, sql can finish very quickly. > we can reproduce this problem through example below: > create table test.test_a ( > day string, > week int, > weekday int) > partitioned by ( > dt varchar(8)) > stored as orc; > insert into test.test_a partition (dt=20211126) values('1',1,2); > create table test.test_b ( > session_id string, > device_id string, > brand string, > model string, > wx_version string, > os string, > net_work_type string, > app_id string, > app_name string, > col_z string, > page_url string, > page_title string, > olabel string, > otitle string, > source string, > send_dt string, > recv_dt string, > request_time string, > write_time string, > client_ip string, > col_a string, > dt_hour varchar(12), > product string, > channelfrom string, > customer_um string, > kb_code string, > col_b string, > rectype string, > errcode string, > col_c string, > pageid_merge string) > partitioned by ( > dt varchar(8)) > stored as orc; > insert into test.test_b partition(dt=20211126) > values('2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2'); > > > drop table if exists test.test_c;create table if not exists test.test_c > stored as ORCFILE as > select calendar.day,calendar.week,calendar.weekday, a_kbs, > b_kbs, c_kbs,d_kbs,e_kbs,f_kbs,g_kbs,h_kbs,i_kbs,j_kbs,k_kbs > from (select * from test.test_a where dt = '20211126') calendar > left join > (select dt,count(distinct kb_code) as a_kbs > from test.test_b > where dt = '20211126' > group by dt) t1 > on calendar.dt = t1.dt > left join > (select dt,count(distinct kb_code) as b_kbs > from test.test_b > where dt = '20211126' > group by dt) t2 > on calendar.dt = t2.dt > left join > (select dt,count(distinct kb_code) as c_kbs > from test.test_b > where dt = '20211126' > group by dt) t3 > on calendar.dt = t3.dt > left join > (select dt,count(distinct kb_code) as d_kbs > from test.test_b > where dt = '20211126' > group by dt) t4 > on calendar.dt = t4.dt > left join > (select dt,count(distinct kb_code) as e_kbs > from test.test_b > where dt = '20211126' > group by dt) t5 > on calendar.dt = t5.dt > left join > (select dt,count(distinct kb_code) as f_kbs > from test.test_b > where dt = '20211126' > group by dt) t6 > on calendar.dt = t6.dt > left join > (select dt,count(distinct kb_code) as g_kbs > from test.test_b > where dt = '20211126' > group by dt) t7 > on calendar.dt = t7.dt > left join > (select dt,count(distinct kb_code) as h_kbs > from test.test_b > where dt = '20211126' > group by dt) t8 > on calendar.dt = t8.dt > left join > (select dt,count(distinct kb_code) as i_kbs > from test.test_b > where dt = '20211126' > group by dt) t9 > on calendar.dt = t9.dt > left join > (select dt,count(distinct kb_code) as j_kbs > from test.test_b > where dt = '20211126' > group by dt) t10 > on calendar.dt = t10.dt > left join > (select dt,count(distinct kb_code) as k_kbs > from test.test_b > where dt = '20211126' > group by dt) t11 > on calendar.dt = t11.dt > -- This message was sent by Atlassian Jira (v8.20.10#820010) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org