zosimer opened a new issue #3866:
URL: https://github.com/apache/incubator-dolphinscheduler/issues/3866
When the dolphin scheduler connects with spark thriftserver to execute
SQL,setting environment variables spark.sql.adaptive . enabled = true Log
tips“ spark.sql.adaptive .enabled should be boolean, but was true;”;but I did
set the boolean type
### **sql:**
set spark.sql.adaptive.enabled = true;
set spark.sql.adaptive.shuffle.targetPostShuffleInputSize = 134217728;
set spark.sql.adaptive.join.enabled= true;
set spark.sql.autoBroadcastJoinThreshold = 20971520;
set spark.sql.hive.mergeFiles = true;
INSERT overwrite TABLE ods.ods_app_report_vo
partition (`month`='$[yyyy-MM-1]',`day`='$[yyyy-MM-dd-1]')
select
`_id` ,
`hid` ,
`coid`,
`csid`,
`eqid`,
`type`,
`timestamp`,
`date`,
`status`,
`roomid`,
`errmsg`,
`_class`
from ods.ods_app_report_vo_day as a
where a.`day` = '$[yyyy-MM-1]' distribute by rand();
### **workerlogs:**
[INFO] 2020-09-30 23:31:16.902 - [taskAppId=TASK-6-43-218]:[499] - after
replace sql , preparing : set spark.sql.adaptive.enabled=true;
set spark.sql.adaptive.shuffle.targetPostShuffleInputSize=134217728;
set spark.sql.adaptive.join.enabled=true;
set spark.sql.autoBroadcastJoinThreshold=20971520;
set spark.sql.hive.mergeFiles=true;
insert overwrite table ads.ads_community_equipment_house_view
partition (`month`='2020-09',`day`='2020-09-29')
select a.*,NVL(b.app_num,0) app_num
from (select
A.proince_id,
A.proinceName,
A.city_id,
A.city_name,
A.county_id,
A.county_name,
A.CO_ID,
A.CO_Name,
COUNT(distinct A.H_ID) as house_num,
COUNT(distinct A.CO_ID) as community_num,
COUNT(distinct A.EQ_AutoID) as equipment_num
--'2020-09' as month
from dws.dws_community_equipment_room_house_wide a where 1=1
-- and a.house_day<='2020-09-16' and a.community_day <='2020-09-16' and
a.equipment_day <='2020-09-16'
GROUP BY
A.proince_id,
A.proinceName,
A.city_id,
A.city_name,
A.county_id,
A.county_name,
A.CO_ID,
A.CO_Name) a left join
(select
A.proince_id,
A.proinceName,
A.city_id,
A.city_name,
A.county_id,
A.county_name,
A.CO_ID,
A.CO_Name,
COUNT(distinct A.H_ID) as app_num
from dws.dws_community_equipment_room_house_wide a where A.H_LoginType='1'
--and a.house_day<='2020-09-16' and a.community_day <='2020-09-16' and
a.equipment_day <='2020-09-16'
GROUP BY
A.proince_id,
A.proinceName,
A.city_id,
A.city_name,
A.county_id,
A.county_name,
A.CO_ID,
A.CO_Name) b on a.proince_id=b.proince_id and a.city_id=b.city_id and
a.county_id=b.county_id and a.CO_ID=b.CO_ID distribute by rand();
[INFO] 2020-09-30 23:31:16.904 - [taskAppId=TASK-6-43-218]:[504] - Sql
Params are replaced sql , parameters:
[INFO] 2020-09-30 23:31:16.904 - [taskAppId=TASK-6-43-218]:[52] - can't
find udf function resource
[INFO] 2020-09-30 23:31:16.905 org.apache.hive.jdbc.Utils:[318] - Supplied
authorities: 192.168.2.139:10000
[INFO] 2020-09-30 23:31:16.905 org.apache.hive.jdbc.Utils:[437] - Resolved
authority: 192.168.2.139:10000
[INFO] 2020-09-30 23:31:16.945 - [taskAppId=TASK-6-43-218]:[418] - prepare
statement replace sql : org.apache.hive.jdbc.HivePreparedStatement@29842163
[ERROR] 2020-09-30 23:31:16.952 - [taskAppId=TASK-6-43-218]:[242] - execute
sql error
java.sql.SQLException: java.lang.IllegalArgumentException:
spark.sql.adaptive.enabled should be boolean, but was true;
set spark.sql.adaptive.shuffle.targetPostShuffleInputSize=134217728;
set spark.sql.adaptive.join.enabled=true;
set spark.sql.autoBroadcastJoinThreshold=20971520;
set spark.sql.hive.mergeFiles=true;
insert overwrite table ads.ads_community_equipment_house_view
partition (`month`='2020-09',`day`='2020-09-29')
select a.*,NVL(b.app_num,0) app_num
from (select
A.proince_id,
A.proinceName,
A.city_id,
A.city_name,
A.county_id,
A.county_name,
A.CO_ID,
A.CO_Name,
COUNT(distinct A.H_ID) as house_num,
COUNT(distinct A.CO_ID) as community_num,
COUNT(distinct A.EQ_AutoID) as equipment_num
--'2020-09' as month
from dws.dws_community_equipment_room_house_wide a where 1=1
-- and a.house_day<='2020-09-16' and a.community_day <='2020-09-16' and
a.equipment_day <='2020-09-16'
GROUP BY
A.proince_id,
A.proinceName,
A.city_id,
A.city_name,
A.county_id,
A.county_name,
A.CO_ID,
A.CO_Name) a left join
(select
A.proince_id,
A.proinceName,
A.city_id,
A.city_name,
A.county_id,
A.county_name,
A.CO_ID,
A.CO_Name,
COUNT(distinct A.H_ID) as app_num
from dws.dws_community_equipment_room_house_wide a where A.H_LoginType='1'
--and a.house_day<='2020-09-16' and a.community_day <='2020-09-16' and
a.equipment_day <='2020-09-16'
GROUP BY
A.proince_id,
A.proinceName,
A.city_id,
A.city_name,
A.county_id,
A.county_name,
A.CO_ID,
A.CO_Name) b on a.proince_id=b.proince_id and a.city_id=b.city_id and
a.county_id=b.county_id and a.CO_ID=b.CO_ID distribute by rand();
at
org.apache.hive.jdbc.HiveStatement.waitForOperationToComplete(HiveStatement.java:348)
at org.apache.hive.jdbc.HiveStatement.execute(HiveStatement.java:251)
at
org.apache.hive.jdbc.HiveStatement.executeUpdate(HiveStatement.java:448)
at
org.apache.hive.jdbc.HivePreparedStatement.executeUpdate(HivePreparedStatement.java:119)
at
org.apache.dolphinscheduler.server.worker.task.sql.SqlTask.executeFuncAndSql(SqlTask.java:236)
at
org.apache.dolphinscheduler.server.worker.task.sql.SqlTask.handle(SqlTask.java:139)
at
org.apache.dolphinscheduler.server.worker.runner.TaskExecuteThread.run(TaskExecuteThread.java:129)
at
java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
[ERROR] 2020-09-30 23:31:16.962 - [taskAppId=TASK-6-43-218]:[145] - sql
task error
java.lang.RuntimeException: execute sql error
at
org.apache.dolphinscheduler.server.worker.task.sql.SqlTask.executeFuncAndSql(SqlTask.java:243)
at
org.apache.dolphinscheduler.server.worker.task.sql.SqlTask.handle(SqlTask.java:139)
at
org.apache.dolphinscheduler.server.worker.runner.TaskExecuteThread.run(TaskExecuteThread.java:129)
at
java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
[ERROR] 2020-09-30 23:31:16.964
org.apache.dolphinscheduler.server.worker.runner.TaskExecuteThread:[140] - task
scheduler failure
java.lang.RuntimeException: execute sql error
at
org.apache.dolphinscheduler.server.worker.task.sql.SqlTask.executeFuncAndSql(SqlTask.java:243)
at
org.apache.dolphinscheduler.server.worker.task.sql.SqlTask.handle(SqlTask.java:139)
at
org.apache.dolphinscheduler.server.worker.runner.TaskExecuteThread.run(TaskExecuteThread.java:129)
at
java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]